Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.

This bring huge amount of changes, I'll enumerate only user-visible changes: - Delegated Administration Allows regular users to perform ZFS operations, like file system creation, snapshot creation, etc. - L2ARC Level 2 cache for ZFS - allows to use additional disks for cache. Huge performance improvements mostly for random read of mostly static content. - slog Allow to use additional disks for ZFS Intent Log to speed up operations like fsync(2). - vfs.zfs.super_owner Allows regular users to perform privileged operations on files stored on ZFS file systems owned by him. Very careful with this one. - chflags(2) Not all the flags are supported. This still needs work. - ZFSBoot Support to boot off of ZFS pool. Not finished, AFAIK. Submitted by: dfr - Snapshot properties - New failure modes Before if write requested failed, system paniced. Now one can select from one of three failure modes: - panic - panic on write error - wait - wait for disk to reappear - continue - serve read requests if possible, block write requests - Refquota, refreservation properties Just quota and reservation properties, but don't count space consumed by children file systems, clones and snapshots. - Sparse volumes ZVOLs that don't reserve space in the pool. - External attributes Compatible with extattr(2). - NFSv4-ACLs Not sure about the status, might not be complete yet. Submitted by: trasz - Creation-time properties - Regression tests for zpool(8) command. Obtained from: OpenSolaris
svn path=/head/; revision=185029
2024-07-22 02:37:15 +00:00 · 2008-11-17 20:49:29 +00:00 · 2008-11-17 20:49:29 +00:00 · 1ba4a712dd · 2020-12-20 02:59:44 +00:00
parent 8fc061164d
commit 1ba4a712dd
341 changed files with 105264 additions and 16604 deletions
--- a/cddl/compat/opensolaris/include/libshare.h
+++ b/cddl/compat/opensolaris/include/libshare.h
@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_LIBSHARE_H_
+#define _OPENSOLARIS_LIBSHARE_H_
+
+#define	SA_OK	0
+
+#define	SA_INIT_CONTROL_API	0
+
+#endif	/* !_OPENSOLARIS_LIBSHARE_H_ */
--- a/cddl/compat/opensolaris/include/mnttab.h
+++ b/cddl/compat/opensolaris/include/mnttab.h
@ -9,6 +9,8 @@
 #define	MNTTAB		_PATH_DEVNULL
 #define	MNT_LINE_MAX	1024

+#define	umount2(p, f)	unmount(p, f)
+
 struct mnttab {
 	char	*mnt_special;
 	char	*mnt_mountp;
--- a/cddl/compat/opensolaris/misc/mnttab.c
+++ b/cddl/compat/opensolaris/misc/mnttab.c
@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>

 static char *
 mntopt(char **p)
--- a/cddl/compat/opensolaris/misc/zmount.c
+++ b/cddl/compat/opensolaris/misc/zmount.c
@ -35,9 +35,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 #include <sys/uio.h>
 #include <sys/mntent.h>
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <assert.h>
+#include <string.h>

 static void
 build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
@ -86,7 +87,7 @@ zmount(const char *spec, const char *dir, int mflag, char *fstype,
 	assert(optlen > 0);

 	optstr = strdup(optptr);
-	assert(optptr != NULL);
+	assert(optstr != NULL);

 	iov = NULL;
 	iovlen = 0;
--- a/cddl/compat/opensolaris/misc/zone.c
+++ b/cddl/compat/opensolaris/misc/zone.c
@ -32,7 +32,7 @@
 #include <sys/sysctl.h>
 #include <sys/zone.h>

-int
+zoneid_t
 getzoneid(void)
 {
 	size_t size;
@ -42,5 +42,5 @@ getzoneid(void)
 	size = sizeof(jailid);
 	if (sysctlbyname("security.jail.jailed", &jailid, &size, NULL, 0) == -1)
 		assert(!"No security.jail.jailed sysctl!");
-	return (jailid);
+	return ((zoneid_t)jailid);
 }
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.8
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
@ -28,13 +28,17 @@ zdb \- ZFS debugger
 .fi

 .SH DESCRIPTION
+.sp
 .LP
 The \fBzdb\fR command is used by support engineers to diagnose failures and gather statistics. Since the \fBZFS\fR file system is always consistent on disk and is self-repairing, \fBzdb\fR should only be run under the direction by a support engineer.
+.sp
 .LP
 If no arguments are specified, \fBzdb\fR, performs basic consistency checks on the pool and associated datasets, and report any problems detected.
+.sp
 .LP
 Any options supported by this command are internal to Sun and subject to change at any time.
 .SH EXIT STATUS
+.sp
 .LP
 The following exit values are returned:
 .sp
@ -71,6 +75,7 @@ Invalid command line options were specified.
 .RE

 .SH ATTRIBUTES
+.sp
 .LP
 See \fBattributes\fR(5) for descriptions of the following attributes:
 .sp
@ -89,5 +94,6 @@ Interface StabilityUnstable
 .TE

 .SH SEE ALSO
+.sp
 .LP
 \fBzfs\fR(1M), \fBzpool\fR(1M), \fBattributes\fR(5)
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -233,19 +233,26 @@ typedef struct zil_rec_info {
 } zil_rec_info_t;

 static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
-	{	NULL,			"Total      "	},
-	{	zil_prt_rec_create,	"TX_CREATE  "	},
-	{	zil_prt_rec_create,	"TX_MKDIR   "	},
-	{	zil_prt_rec_create,	"TX_MKXATTR "	},
-	{	zil_prt_rec_create,	"TX_SYMLINK "	},
-	{	zil_prt_rec_remove,	"TX_REMOVE  "	},
-	{	zil_prt_rec_remove,	"TX_RMDIR   "	},
-	{	zil_prt_rec_link,	"TX_LINK    "	},
-	{	zil_prt_rec_rename,	"TX_RENAME  "	},
-	{	zil_prt_rec_write,	"TX_WRITE   "	},
-	{	zil_prt_rec_truncate,	"TX_TRUNCATE"	},
-	{	zil_prt_rec_setattr,	"TX_SETATTR "	},
-	{	zil_prt_rec_acl,	"TX_ACL     "	},
+	{	NULL,			"Total              " },
+	{	zil_prt_rec_create,	"TX_CREATE          " },
+	{	zil_prt_rec_create,	"TX_MKDIR           " },
+	{	zil_prt_rec_create,	"TX_MKXATTR         " },
+	{	zil_prt_rec_create,	"TX_SYMLINK         " },
+	{	zil_prt_rec_remove,	"TX_REMOVE          " },
+	{	zil_prt_rec_remove,	"TX_RMDIR           " },
+	{	zil_prt_rec_link,	"TX_LINK            " },
+	{	zil_prt_rec_rename,	"TX_RENAME          " },
+	{	zil_prt_rec_write,	"TX_WRITE           " },
+	{	zil_prt_rec_truncate,	"TX_TRUNCATE        " },
+	{	zil_prt_rec_setattr,	"TX_SETATTR         " },
+	{	zil_prt_rec_acl,	"TX_ACL_V0          " },
+	{	zil_prt_rec_acl,	"TX_ACL_ACL         " },
+	{	zil_prt_rec_create,	"TX_CREATE_ACL      " },
+	{	zil_prt_rec_create,	"TX_CREATE_ATTR     " },
+	{	zil_prt_rec_create,	"TX_CREATE_ACL_ATTR " },
+	{	zil_prt_rec_create,	"TX_MKDIR_ACL       " },
+	{	zil_prt_rec_create,	"TX_MKDIR_ATTR      " },
+	{	zil_prt_rec_create,	"TX_MKDIR_ACL_ATTR  " },
 };

 /* ARGSUSED */
@ -255,12 +262,14 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
 	int txtype;
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);

+	/* reduce size of txtype to strip off TX_CI bit */
 	txtype = lr->lrc_txtype;

 	ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE);
 	ASSERT(lr->lrc_txg);

-	(void) printf("\t\t%s len %6llu, txg %llu, seq %llu\n",
+	(void) printf("\t\t%s%s len %6llu, txg %llu, seq %llu\n",
+	    (lr->lrc_txtype & TX_CI) ? "CI-" : "",
 	    zil_rec_info[txtype].zri_name,
 	    (u_longlong_t)lr->lrc_reclen,
 	    (u_longlong_t)lr->lrc_txg,
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
@ -19,12 +19,10 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <libintl.h>
 #include <libuutil.h>
 #include <stddef.h>
@ -56,28 +54,43 @@ typedef struct zfs_node {

 typedef struct callback_data {
 	uu_avl_t	*cb_avl;
-	int		cb_recurse;
+	int		cb_flags;
 	zfs_type_t	cb_types;
 	zfs_sort_column_t *cb_sortcol;
-	zfs_proplist_t	**cb_proplist;
+	zprop_list_t	**cb_proplist;
 } callback_data_t;

 uu_avl_pool_t *avl_pool;

 /*
- * Called for each dataset.  If the object the object is of an appropriate type,
+ * Include snaps if they were requested or if this a zfs list where types
+ * were not specified and the "listsnapshots" property is set on this pool.
+ */
+static int
+zfs_include_snapshots(zfs_handle_t *zhp, callback_data_t *cb)
+{
+	zpool_handle_t *zph;
+
+	if ((cb->cb_flags & ZFS_ITER_PROP_LISTSNAPS) == 0)
+		return (cb->cb_types & ZFS_TYPE_SNAPSHOT);
+
+	zph = zfs_get_pool_handle(zhp);
+	return (zpool_get_prop_int(zph, ZPOOL_PROP_LISTSNAPS, NULL));
+}
+
+/*
+ * Called for each dataset.  If the object is of an appropriate type,
 * add it to the avl tree and recurse over any children as necessary.
 */
-int
+static int
 zfs_callback(zfs_handle_t *zhp, void *data)
 {
 	callback_data_t *cb = data;
 	int dontclose = 0;
+	int include_snaps = zfs_include_snapshots(zhp, cb);

-	/*
-	 * If this object is of the appropriate type, add it to the AVL tree.
-	 */
-	if (zfs_get_type(zhp) & cb->cb_types) {
+	if ((zfs_get_type(zhp) & cb->cb_types) ||
+	    ((zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) && include_snaps)) {
 		uu_avl_index_t idx;
 		zfs_node_t *node = safe_malloc(sizeof (zfs_node_t));

@ -100,10 +113,12 @@ zfs_callback(zfs_handle_t *zhp, void *data)
 	/*
 	 * Recurse if necessary.
 	 */
-	if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM ||
-	    (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types &
-	    ZFS_TYPE_SNAPSHOT))))
-		(void) zfs_iter_children(zhp, zfs_callback, data);
+	if (cb->cb_flags & ZFS_ITER_RECURSE) {
+		if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM)
+			(void) zfs_iter_filesystems(zhp, zfs_callback, data);
+		if ((zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) && include_snaps)
+			(void) zfs_iter_snapshots(zhp, zfs_callback, data);
+	}

 	if (!dontclose)
 		zfs_close(zhp);
@ -118,7 +133,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
 	zfs_sort_column_t *col;
 	zfs_prop_t prop;

-	if ((prop = zfs_name_to_prop(name)) == ZFS_PROP_INVAL &&
+	if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL &&
 	    !zfs_prop_user(name))
 		return (-1);

@ -126,7 +141,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,

 	col->sc_prop = prop;
 	col->sc_reverse = reverse;
-	if (prop == ZFS_PROP_INVAL) {
+	if (prop == ZPROP_INVAL) {
 		col->sc_user_prop = safe_malloc(strlen(name) + 1);
 		(void) strcpy(col->sc_user_prop, name);
 	}
@ -243,7 +258,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
 		 * Otherwise, we compare 'lnum' and 'rnum'.
 		 */
 		lstr = rstr = NULL;
-		if (psc->sc_prop == ZFS_PROP_INVAL) {
+		if (psc->sc_prop == ZPROP_INVAL) {
 			nvlist_t *luser, *ruser;
 			nvlist_t *lval, *rval;

@ -257,10 +272,10 @@ zfs_sort(const void *larg, const void *rarg, void *data)

 			if (lvalid)
 				verify(nvlist_lookup_string(lval,
-				    ZFS_PROP_VALUE, &lstr) == 0);
+				    ZPROP_VALUE, &lstr) == 0);
 			if (rvalid)
 				verify(nvlist_lookup_string(rval,
-				    ZFS_PROP_VALUE, &rstr) == 0);
+				    ZPROP_VALUE, &rstr) == 0);

 		} else if (zfs_prop_is_string(psc->sc_prop)) {
 			lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf,
@ -293,7 +308,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)

 		if (lstr)
 			ret = strcmp(lstr, rstr);
-		if (lnum < rnum)
+		else if (lnum < rnum)
 			ret = -1;
 		else if (lnum > rnum)
 			ret = 1;
@ -309,9 +324,9 @@ zfs_sort(const void *larg, const void *rarg, void *data)
 }

 int
-zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
-    zfs_sort_column_t *sortcol, zfs_proplist_t **proplist, zfs_iter_f callback,
-    void *data, boolean_t args_can_be_paths)
+zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
+    zfs_sort_column_t *sortcol, zprop_list_t **proplist,
+    zfs_iter_f callback, void *data)
 {
 	callback_data_t cb;
 	int ret = 0;
@ -328,7 +343,7 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
 	}

 	cb.cb_sortcol = sortcol;
-	cb.cb_recurse = recurse;
+	cb.cb_flags = flags;
 	cb.cb_proplist = proplist;
 	cb.cb_types = types;
 	if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
@ -341,7 +356,7 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
 		/*
 		 * If given no arguments, iterate over all datasets.
 		 */
-		cb.cb_recurse = 1;
+		cb.cb_flags |= ZFS_ITER_RECURSE;
 		ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
 	} else {
 		int i;
@ -354,14 +369,14 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
 		 * can take volumes as well.
 		 */
 		argtype = types;
-		if (recurse) {
+		if (flags & ZFS_ITER_RECURSE) {
 			argtype |= ZFS_TYPE_FILESYSTEM;
 			if (types & ZFS_TYPE_SNAPSHOT)
 				argtype |= ZFS_TYPE_VOLUME;
 		}

 		for (i = 0; i < argc; i++) {
-			if (args_can_be_paths) {
+			if (flags & ZFS_ITER_ARGS_CAN_BE_PATHS) {
 				zhp = zfs_path_to_zhandle(g_zfs, argv[i],
 				    argtype);
 			} else {
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
@ -19,15 +19,13 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef	ZFS_ITER_H
 #define	ZFS_ITER_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@ -40,8 +38,12 @@ typedef struct zfs_sort_column {
 	boolean_t		sc_reverse;
 } zfs_sort_column_t;

-int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_sort_column_t *,
-    zfs_proplist_t **, zfs_iter_f, void *, boolean_t);
+#define	ZFS_ITER_RECURSE	   (1 << 0)
+#define	ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
+#define	ZFS_ITER_PROP_LISTSNAPS    (1 << 2)
+
+int zfs_for_each(int, char **, int options, zfs_type_t,
+    zfs_sort_column_t *, zprop_list_t **, zfs_iter_f, void *);
 int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t);
 void zfs_free_sort_columns(zfs_sort_column_t *);

--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
--- a/cddl/contrib/opensolaris/cmd/zinject/translate.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/translate.c
@ -0,0 +1,460 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <libzfs.h>
+
+#undef verify	/* both libzfs.h and zfs_context.h want to define this */
+
+#include <sys/zfs_context.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/file.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/vdev_impl.h>
+
+#include "zinject.h"
+
+#include <assert.h>
+#define	verify	assert
+
+extern void kernel_init(int);
+extern void kernel_fini(void);
+
+static int debug;
+
+static void
+ziprintf(const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!debug)
+		return;
+
+	va_start(ap, fmt);
+	(void) vprintf(fmt, ap);
+	va_end(ap);
+}
+
+/*
+ * Given a full path to a file, translate into a dataset name and a relative
+ * path within the dataset.  'dataset' must be at least MAXNAMELEN characters,
+ * and 'relpath' must be at least MAXPATHLEN characters.  We also pass a stat64
+ * buffer, which we need later to get the object ID.
+ */
+static int
+parse_pathname(const char *fullpath, char *dataset, char *relpath,
+    struct stat64 *statbuf)
+{
+	struct statfs sfs;
+	const char *rel;
+
+	if (fullpath[0] != '/') {
+		(void) fprintf(stderr, "invalid object '%s': must be full "
+		    "path\n", fullpath);
+		usage();
+		return (-1);
+	}
+
+	if (strlen(fullpath) >= MAXPATHLEN) {
+		(void) fprintf(stderr, "invalid object; pathname too long\n");
+		return (-1);
+	}
+
+	if (stat64(fullpath, statbuf) != 0) {
+		(void) fprintf(stderr, "cannot open '%s': %s\n",
+		    fullpath, strerror(errno));
+		return (-1);
+	}
+
+	if (statfs(fullpath, &sfs) == -1) {
+		(void) fprintf(stderr, "cannot find mountpoint for '%s': %s\n",
+		    fullpath, strerror(errno));
+		return (-1);
+	}
+
+	if (strcmp(sfs.f_fstypename, MNTTYPE_ZFS) != 0) {
+		(void) fprintf(stderr, "invalid path '%s': not a ZFS "
+		    "filesystem\n", fullpath);
+		return (-1);
+	}
+
+	if (strncmp(fullpath, sfs.f_mntonname, strlen(sfs.f_mntonname)) != 0) {
+		(void) fprintf(stderr, "invalid path '%s': mountpoint "
+		    "doesn't match path\n", fullpath);
+		return (-1);
+	}
+
+	(void) strcpy(dataset, sfs.f_mntfromname);
+
+	rel = fullpath + strlen(sfs.f_mntonname);
+	if (rel[0] == '/')
+		rel++;
+	(void) strcpy(relpath, rel);
+
+	return (0);
+}
+
+/*
+ * Convert from a (dataset, path) pair into a (objset, object) pair.  Note that
+ * we grab the object number from the inode number, since looking this up via
+ * libzpool is a real pain.
+ */
+/* ARGSUSED */
+static int
+object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
+    zinject_record_t *record)
+{
+	objset_t *os;
+	int err;
+
+	/*
+	 * Before doing any libzpool operations, call sync() to ensure that the
+	 * on-disk state is consistent with the in-core state.
+	 */
+	sync();
+
+	if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
+	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+		    dataset, strerror(err));
+		return (-1);
+	}
+
+	record->zi_objset = dmu_objset_id(os);
+	record->zi_object = statbuf->st_ino;
+
+	dmu_objset_close(os);
+
+	return (0);
+}
+
+/*
+ * Calculate the real range based on the type, level, and range given.
+ */
+static int
+calculate_range(const char *dataset, err_type_t type, int level, char *range,
+    zinject_record_t *record)
+{
+	objset_t *os = NULL;
+	dnode_t *dn = NULL;
+	int err;
+	int ret = -1;
+
+	/*
+	 * Determine the numeric range from the string.
+	 */
+	if (range == NULL) {
+		/*
+		 * If range is unspecified, set the range to [0,-1], which
+		 * indicates that the whole object should be treated as an
+		 * error.
+		 */
+		record->zi_start = 0;
+		record->zi_end = -1ULL;
+	} else {
+		char *end;
+
+		/* XXX add support for suffixes */
+		record->zi_start = strtoull(range, &end, 10);
+
+
+		if (*end == '\0')
+			record->zi_end = record->zi_start + 1;
+		else if (*end == ',')
+			record->zi_end = strtoull(end + 1, &end, 10);
+
+		if (*end != '\0') {
+			(void) fprintf(stderr, "invalid range '%s': must be "
+			    "a numeric range of the form 'start[,end]'\n",
+			    range);
+			goto out;
+		}
+	}
+
+	switch (type) {
+	case TYPE_DATA:
+		break;
+
+	case TYPE_DNODE:
+		/*
+		 * If this is a request to inject faults into the dnode, then we
+		 * must translate the current (objset,object) pair into an
+		 * offset within the metadnode for the objset.  Specifying any
+		 * kind of range with type 'dnode' is illegal.
+		 */
+		if (range != NULL) {
+			(void) fprintf(stderr, "range cannot be specified when "
+			    "type is 'dnode'\n");
+			goto out;
+		}
+
+		record->zi_start = record->zi_object * sizeof (dnode_phys_t);
+		record->zi_end = record->zi_start + sizeof (dnode_phys_t);
+		record->zi_object = 0;
+		break;
+	}
+
+	/*
+	 * Get the dnode associated with object, so we can calculate the block
+	 * size.
+	 */
+	if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
+	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+		    dataset, strerror(err));
+		goto out;
+	}
+
+	if (record->zi_object == 0) {
+		dn = os->os->os_meta_dnode;
+	} else {
+		err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+		if (err != 0) {
+			(void) fprintf(stderr, "failed to hold dnode "
+			    "for object %llu\n",
+			    (u_longlong_t)record->zi_object);
+			goto out;
+		}
+	}
+
+
+	ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
+	ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
+
+	/*
+	 * Translate range into block IDs.
+	 */
+	if (record->zi_start != 0 || record->zi_end != -1ULL) {
+		record->zi_start >>= dn->dn_datablkshift;
+		record->zi_end >>= dn->dn_datablkshift;
+	}
+
+	/*
+	 * Check level, and then translate level 0 blkids into ranges
+	 * appropriate for level of indirection.
+	 */
+	record->zi_level = level;
+	if (level > 0) {
+		ziprintf("level 0 blkid range: [%llu, %llu]\n",
+		    record->zi_start, record->zi_end);
+
+		if (level >= dn->dn_nlevels) {
+			(void) fprintf(stderr, "level %d exceeds max level "
+			    "of object (%d)\n", level, dn->dn_nlevels - 1);
+			goto out;
+		}
+
+		if (record->zi_start != 0 || record->zi_end != 0) {
+			int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+			for (; level > 0; level--) {
+				record->zi_start >>= shift;
+				record->zi_end >>= shift;
+			}
+		}
+	}
+
+	ret = 0;
+out:
+	if (dn) {
+		if (dn != os->os->os_meta_dnode)
+			dnode_rele(dn, FTAG);
+	}
+	if (os)
+		dmu_objset_close(os);
+
+	return (ret);
+}
+
+int
+translate_record(err_type_t type, const char *object, const char *range,
+    int level, zinject_record_t *record, char *poolname, char *dataset)
+{
+	char path[MAXPATHLEN];
+	char *slash;
+	struct stat64 statbuf;
+	int ret = -1;
+
+	kernel_init(FREAD);
+
+	debug = (getenv("ZINJECT_DEBUG") != NULL);
+
+	ziprintf("translating: %s\n", object);
+
+	if (MOS_TYPE(type)) {
+		/*
+		 * MOS objects are treated specially.
+		 */
+		switch (type) {
+		case TYPE_MOS:
+			record->zi_type = 0;
+			break;
+		case TYPE_MOSDIR:
+			record->zi_type = DMU_OT_OBJECT_DIRECTORY;
+			break;
+		case TYPE_METASLAB:
+			record->zi_type = DMU_OT_OBJECT_ARRAY;
+			break;
+		case TYPE_CONFIG:
+			record->zi_type = DMU_OT_PACKED_NVLIST;
+			break;
+		case TYPE_BPLIST:
+			record->zi_type = DMU_OT_BPLIST;
+			break;
+		case TYPE_SPACEMAP:
+			record->zi_type = DMU_OT_SPACE_MAP;
+			break;
+		case TYPE_ERRLOG:
+			record->zi_type = DMU_OT_ERROR_LOG;
+			break;
+		}
+
+		dataset[0] = '\0';
+		(void) strcpy(poolname, object);
+		return (0);
+	}
+
+	/*
+	 * Convert a full path into a (dataset, file) pair.
+	 */
+	if (parse_pathname(object, dataset, path, &statbuf) != 0)
+		goto err;
+
+	ziprintf("   dataset: %s\n", dataset);
+	ziprintf("      path: %s\n", path);
+
+	/*
+	 * Convert (dataset, file) into (objset, object)
+	 */
+	if (object_from_path(dataset, path, &statbuf, record) != 0)
+		goto err;
+
+	ziprintf("raw objset: %llu\n", record->zi_objset);
+	ziprintf("raw object: %llu\n", record->zi_object);
+
+	/*
+	 * For the given object, calculate the real (type, level, range)
+	 */
+	if (calculate_range(dataset, type, level, (char *)range, record) != 0)
+		goto err;
+
+	ziprintf("    objset: %llu\n", record->zi_objset);
+	ziprintf("    object: %llu\n", record->zi_object);
+	if (record->zi_start == 0 &&
+	    record->zi_end == -1ULL)
+		ziprintf("     range: all\n");
+	else
+		ziprintf("     range: [%llu, %llu]\n", record->zi_start,
+		    record->zi_end);
+
+	/*
+	 * Copy the pool name
+	 */
+	(void) strcpy(poolname, dataset);
+	if ((slash = strchr(poolname, '/')) != NULL)
+		*slash = '\0';
+
+	ret = 0;
+
+err:
+	kernel_fini();
+	return (ret);
+}
+
+int
+translate_raw(const char *str, zinject_record_t *record)
+{
+	/*
+	 * A raw bookmark of the form objset:object:level:blkid, where each
+	 * number is a hexidecimal value.
+	 */
+	if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
+	    (u_longlong_t *)&record->zi_object, &record->zi_level,
+	    (u_longlong_t *)&record->zi_start) != 4) {
+		(void) fprintf(stderr, "bad raw spec '%s': must be of the form "
+		    "'objset:object:level:blkid'\n", str);
+		return (-1);
+	}
+
+	record->zi_end = record->zi_start;
+
+	return (0);
+}
+
+int
+translate_device(const char *pool, const char *device, err_type_t label_type,
+    zinject_record_t *record)
+{
+	char *end;
+	zpool_handle_t *zhp;
+	nvlist_t *tgt;
+	boolean_t isspare, iscache;
+
+	/*
+	 * Given a device name or GUID, create an appropriate injection record
+	 * with zi_guid set.
+	 */
+	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
+		return (-1);
+
+	record->zi_guid = strtoull(device, &end, 16);
+	if (record->zi_guid == 0 || *end != '\0') {
+		tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL);
+
+		if (tgt == NULL) {
+			(void) fprintf(stderr, "cannot find device '%s' in "
+			    "pool '%s'\n", device, pool);
+			return (-1);
+		}
+
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &record->zi_guid) == 0);
+	}
+
+	switch (label_type) {
+	case TYPE_LABEL_UBERBLOCK:
+		record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
+		record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
+		break;
+	case TYPE_LABEL_NVLIST:
+		record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
+		record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
+		break;
+	}
+	return (0);
+}
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
@ -0,0 +1,770 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * ZFS Fault Injector
+ *
+ * This userland component takes a set of options and uses libzpool to translate
+ * from a user-visible object type and name to an internal representation.
+ * There are two basic types of faults: device faults and data faults.
+ *
+ *
+ * DEVICE FAULTS
+ *
+ * Errors can be injected into a particular vdev using the '-d' option.  This
+ * option takes a path or vdev GUID to uniquely identify the device within a
+ * pool.  There are two types of errors that can be injected, EIO and ENXIO,
+ * that can be controlled through the '-e' option.  The default is ENXIO.  For
+ * EIO failures, any attempt to read data from the device will return EIO, but
+ * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
+ * any attempt to read from the device will return EIO, but any attempt to
+ * reopen the device will also return ENXIO.
+ * For label faults, the -L option must be specified. This allows faults
+ * to be injected into either the nvlist or uberblock region of all the labels
+ * for the specified device.
+ *
+ * This form of the command looks like:
+ *
+ * 	zinject -d device [-e errno] [-L <uber | nvlist>] pool
+ *
+ *
+ * DATA FAULTS
+ *
+ * We begin with a tuple of the form:
+ *
+ * 	<type,level,range,object>
+ *
+ * 	type	A string describing the type of data to target.  Each type
+ * 		implicitly describes how to interpret 'object'. Currently,
+ * 		the following values are supported:
+ *
+ * 		data		User data for a file
+ * 		dnode		Dnode for a file or directory
+ *
+ *		The following MOS objects are special.  Instead of injecting
+ *		errors on a particular object or blkid, we inject errors across
+ *		all objects of the given type.
+ *
+ * 		mos		Any data in the MOS
+ * 		mosdir		object directory
+ * 		config		pool configuration
+ * 		bplist		blkptr list
+ * 		spacemap	spacemap
+ * 		metaslab	metaslab
+ * 		errlog		persistent error log
+ *
+ * 	level	Object level.  Defaults to '0', not applicable to all types.  If
+ * 		a range is given, this corresponds to the indirect block
+ * 		corresponding to the specific range.
+ *
+ *	range	A numerical range [start,end) within the object.  Defaults to
+ *		the full size of the file.
+ *
+ * 	object	A string describing the logical location of the object.  For
+ * 		files and directories (currently the only supported types),
+ * 		this is the path of the object on disk.
+ *
+ * This is translated, via libzpool, into the following internal representation:
+ *
+ * 	<type,objset,object,level,range>
+ *
+ * These types should be self-explanatory.  This tuple is then passed to the
+ * kernel via a special ioctl() to initiate fault injection for the given
+ * object.  Note that 'type' is not strictly necessary for fault injection, but
+ * is used when translating existing faults into a human-readable string.
+ *
+ *
+ * The command itself takes one of the forms:
+ *
+ * 	zinject
+ * 	zinject <-a | -u pool>
+ * 	zinject -c <id|all>
+ * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
+ *	    [-r range] <object>
+ * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
+ *
+ * With no arguments, the command prints all currently registered injection
+ * handlers, with their numeric identifiers.
+ *
+ * The '-c' option will clear the given handler, or all handlers if 'all' is
+ * specified.
+ *
+ * The '-e' option takes a string describing the errno to simulate.  This must
+ * be either 'io' or 'checksum'.  In most cases this will result in the same
+ * behavior, but RAID-Z will produce a different set of ereports for this
+ * situation.
+ *
+ * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
+ * specified, then the ARC cache is flushed appropriately.  If '-u' is
+ * specified, then the underlying SPA is unloaded.  Either of these flags can be
+ * specified independently of any other handlers.  The '-m' flag automatically
+ * does an unmount and remount of the underlying dataset to aid in flushing the
+ * cache.
+ *
+ * The '-f' flag controls the frequency of errors injected, expressed as a
+ * integer percentage between 1 and 100.  The default is 100.
+ *
+ * The this form is responsible for actually injecting the handler into the
+ * framework.  It takes the arguments described above, translates them to the
+ * internal tuple using libzpool, and then issues an ioctl() to register the
+ * handler.
+ *
+ * The final form can target a specific bookmark, regardless of whether a
+ * human-readable interface has been designed.  It allows developers to specify
+ * a particular block by number.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/fs/zfs.h>
+#include <sys/mount.h>
+
+#include <libzfs.h>
+
+#undef verify	/* both libzfs.h and zfs_context.h want to define this */
+
+#include "zinject.h"
+
+libzfs_handle_t *g_zfs;
+int zfs_fd;
+
+#ifndef ECKSUM
+#define	ECKSUM	EBADE
+#endif
+
+static const char *errtable[TYPE_INVAL] = {
+	"data",
+	"dnode",
+	"mos",
+	"mosdir",
+	"metaslab",
+	"config",
+	"bplist",
+	"spacemap",
+	"errlog",
+	"uber",
+	"nvlist"
+};
+
+static err_type_t
+name_to_type(const char *arg)
+{
+	int i;
+	for (i = 0; i < TYPE_INVAL; i++)
+		if (strcmp(errtable[i], arg) == 0)
+			return (i);
+
+	return (TYPE_INVAL);
+}
+
+static const char *
+type_to_name(uint64_t type)
+{
+	switch (type) {
+	case DMU_OT_OBJECT_DIRECTORY:
+		return ("mosdir");
+	case DMU_OT_OBJECT_ARRAY:
+		return ("metaslab");
+	case DMU_OT_PACKED_NVLIST:
+		return ("config");
+	case DMU_OT_BPLIST:
+		return ("bplist");
+	case DMU_OT_SPACE_MAP:
+		return ("spacemap");
+	case DMU_OT_ERROR_LOG:
+		return ("errlog");
+	default:
+		return ("-");
+	}
+}
+
+
+/*
+ * Print usage message.
+ */
+void
+usage(void)
+{
+	(void) printf(
+	    "usage:\n"
+	    "\n"
+	    "\tzinject\n"
+	    "\n"
+	    "\t\tList all active injection records.\n"
+	    "\n"
+	    "\tzinject -c <id|all>\n"
+	    "\n"
+	    "\t\tClear the particular record (if given a numeric ID), or\n"
+	    "\t\tall records if 'all' is specificed.\n"
+	    "\n"
+	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+	    "\t\tInject a fault into a particular device or the device's\n"
+	    "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
+	    "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
+	    "\n"
+	    "\tzinject -b objset:object:level:blkid pool\n"
+	    "\n"
+	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
+	    "\t\tspecified by the remaining tuple.  Each number is in\n"
+	    "\t\thexidecimal, and only one block can be specified.\n"
+	    "\n"
+	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
+	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
+	    "\n"
+	    "\t\tInject an error into the object specified by the '-t' option\n"
+	    "\t\tand the object descriptor.  The 'object' parameter is\n"
+	    "\t\tinterperted depending on the '-t' option.\n"
+	    "\n"
+	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
+	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
+	    "\t\t\t'checksum'.  Default is 'io'.\n"
+	    "\t\t-l\tInject error at a particular block level. Default is "
+	    "0.\n"
+	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
+	    "\t\t-r\tInject error over a particular logical range of an\n"
+	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
+	    "\t\t\trange according to the object's properties.\n"
+	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
+	    "\t\t\tassociated object.\n"
+	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
+	    "\t\t\ta pool object.\n"
+	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
+	    "\t\t\ta percentage between 1 and 100.\n"
+	    "\n"
+	    "\t-t data\t\tInject an error into the plain file contents of a\n"
+	    "\t\t\tfile.  The object must be specified as a complete path\n"
+	    "\t\t\tto a file on a ZFS filesystem.\n"
+	    "\n"
+	    "\t-t dnode\tInject an error into the metadnode in the block\n"
+	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
+	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
+	    "\t\t\tis specified as a complete path to a file or directory\n"
+	    "\t\t\ton a ZFS filesystem.\n"
+	    "\n"
+	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
+	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bplist,\n"
+	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
+	    "\t\t\tthe poolname.\n");
+}
+
+static int
+iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
+    void *data)
+{
+	zfs_cmd_t zc;
+	int ret;
+
+	zc.zc_guid = 0;
+
+	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
+		if ((ret = func((int)zc.zc_guid, zc.zc_name,
+		    &zc.zc_inject_record, data)) != 0)
+			return (ret);
+
+	return (0);
+}
+
+static int
+print_data_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+	int *count = data;
+
+	if (record->zi_guid != 0)
+		return (0);
+
+	if (*count == 0) {
+		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
+		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
+		(void) printf("---  ---------------  ------  "
+		    "------  --------  ---  ---------------\n");
+	}
+
+	*count += 1;
+
+	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
+	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
+	    type_to_name(record->zi_type), record->zi_level);
+
+	if (record->zi_start == 0 &&
+	    record->zi_end == -1ULL)
+		(void) printf("all\n");
+	else
+		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
+		    (u_longlong_t)record->zi_end);
+
+	return (0);
+}
+
+static int
+print_device_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+	int *count = data;
+
+	if (record->zi_guid == 0)
+		return (0);
+
+	if (*count == 0) {
+		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
+		(void) printf("---  ---------------  ----------------\n");
+	}
+
+	*count += 1;
+
+	(void) printf("%3d  %-15s  %llx\n", id, pool,
+	    (u_longlong_t)record->zi_guid);
+
+	return (0);
+}
+
+/*
+ * Print all registered error handlers.  Returns the number of handlers
+ * registered.
+ */
+static int
+print_all_handlers(void)
+{
+	int count = 0;
+
+	(void) iter_handlers(print_device_handler, &count);
+	(void) printf("\n");
+	count = 0;
+	(void) iter_handlers(print_data_handler, &count);
+
+	return (count);
+}
+
+/* ARGSUSED */
+static int
+cancel_one_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+	zfs_cmd_t zc;
+
+	zc.zc_guid = (uint64_t)id;
+
+	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
+		    id, strerror(errno));
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Remove all fault injection handlers.
+ */
+static int
+cancel_all_handlers(void)
+{
+	int ret = iter_handlers(cancel_one_handler, NULL);
+
+	(void) printf("removed all registered handlers\n");
+
+	return (ret);
+}
+
+/*
+ * Remove a specific fault injection handler.
+ */
+static int
+cancel_handler(int id)
+{
+	zfs_cmd_t zc;
+
+	zc.zc_guid = (uint64_t)id;
+
+	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
+		    id, strerror(errno));
+		return (1);
+	}
+
+	(void) printf("removed handler %d\n", id);
+
+	return (0);
+}
+
+/*
+ * Register a new fault injection handler.
+ */
+static int
+register_handler(const char *pool, int flags, zinject_record_t *record,
+    int quiet)
+{
+	zfs_cmd_t zc;
+
+	(void) strcpy(zc.zc_name, pool);
+	zc.zc_inject_record = *record;
+	zc.zc_guid = flags;
+
+	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
+		(void) fprintf(stderr, "failed to add handler: %s\n",
+		    strerror(errno));
+		return (1);
+	}
+
+	if (flags & ZINJECT_NULL)
+		return (0);
+
+	if (quiet) {
+		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
+	} else {
+		(void) printf("Added handler %llu with the following "
+		    "properties:\n", (u_longlong_t)zc.zc_guid);
+		(void) printf("  pool: %s\n", pool);
+		if (record->zi_guid) {
+			(void) printf("  vdev: %llx\n",
+			    (u_longlong_t)record->zi_guid);
+		} else {
+			(void) printf("objset: %llu\n",
+			    (u_longlong_t)record->zi_objset);
+			(void) printf("object: %llu\n",
+			    (u_longlong_t)record->zi_object);
+			(void) printf("  type: %llu\n",
+			    (u_longlong_t)record->zi_type);
+			(void) printf(" level: %d\n", record->zi_level);
+			if (record->zi_start == 0 &&
+			    record->zi_end == -1ULL)
+				(void) printf(" range: all\n");
+			else
+				(void) printf(" range: [%llu, %llu)\n",
+				    (u_longlong_t)record->zi_start,
+				    (u_longlong_t)record->zi_end);
+		}
+	}
+
+	return (0);
+}
+
+int
+main(int argc, char **argv)
+{
+	int c;
+	char *range = NULL;
+	char *cancel = NULL;
+	char *end;
+	char *raw = NULL;
+	char *device = NULL;
+	int level = 0;
+	int quiet = 0;
+	int error = 0;
+	int domount = 0;
+	err_type_t type = TYPE_INVAL;
+	err_type_t label = TYPE_INVAL;
+	zinject_record_t record = { 0 };
+	char pool[MAXNAMELEN];
+	char dataset[MAXNAMELEN];
+	zfs_handle_t *zhp;
+	int ret;
+	int flags = 0;
+
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, "internal error: failed to "
+		    "initialize ZFS library\n");
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
+	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+		(void) fprintf(stderr, "failed to open ZFS device\n");
+		return (1);
+	}
+
+	if (argc == 1) {
+		/*
+		 * No arguments.  Print the available handlers.  If there are no
+		 * available handlers, direct the user to '-h' for help
+		 * information.
+		 */
+		if (print_all_handlers() == 0) {
+			(void) printf("No handlers registered.\n");
+			(void) printf("Run 'zinject -h' for usage "
+			    "information.\n");
+		}
+
+		return (0);
+	}
+
+	while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+		switch (c) {
+		case 'a':
+			flags |= ZINJECT_FLUSH_ARC;
+			break;
+		case 'b':
+			raw = optarg;
+			break;
+		case 'c':
+			cancel = optarg;
+			break;
+		case 'd':
+			device = optarg;
+			break;
+		case 'e':
+			if (strcasecmp(optarg, "io") == 0) {
+				error = EIO;
+			} else if (strcasecmp(optarg, "checksum") == 0) {
+				error = ECKSUM;
+			} else if (strcasecmp(optarg, "nxio") == 0) {
+				error = ENXIO;
+			} else {
+				(void) fprintf(stderr, "invalid error type "
+				    "'%s': must be 'io', 'checksum' or "
+				    "'nxio'\n", optarg);
+				usage();
+				return (1);
+			}
+			break;
+		case 'f':
+			record.zi_freq = atoi(optarg);
+			if (record.zi_freq < 1 || record.zi_freq > 100) {
+				(void) fprintf(stderr, "frequency range must "
+				    "be in the range (0, 100]\n");
+				return (1);
+			}
+			break;
+		case 'h':
+			usage();
+			return (0);
+		case 'l':
+			level = (int)strtol(optarg, &end, 10);
+			if (*end != '\0') {
+				(void) fprintf(stderr, "invalid level '%s': "
+				    "must be an integer\n", optarg);
+				usage();
+				return (1);
+			}
+			break;
+		case 'm':
+			domount = 1;
+			break;
+		case 'q':
+			quiet = 1;
+			break;
+		case 'r':
+			range = optarg;
+			break;
+		case 't':
+			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
+			    !MOS_TYPE(type)) {
+				(void) fprintf(stderr, "invalid type '%s'\n",
+				    optarg);
+				usage();
+				return (1);
+			}
+			break;
+		case 'u':
+			flags |= ZINJECT_UNLOAD_SPA;
+			break;
+		case 'L':
+			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
+			    !LABEL_TYPE(type)) {
+				(void) fprintf(stderr, "invalid label type "
+				    "'%s'\n", optarg);
+				usage();
+				return (1);
+			}
+			break;
+		case ':':
+			(void) fprintf(stderr, "option -%c requires an "
+			    "operand\n", optopt);
+			usage();
+			return (1);
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			usage();
+			return (2);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (cancel != NULL) {
+		/*
+		 * '-c' is invalid with any other options.
+		 */
+		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+		    level != 0) {
+			(void) fprintf(stderr, "cancel (-c) incompatible with "
+			    "any other options\n");
+			usage();
+			return (2);
+		}
+		if (argc != 0) {
+			(void) fprintf(stderr, "extraneous argument to '-c'\n");
+			usage();
+			return (2);
+		}
+
+		if (strcmp(cancel, "all") == 0) {
+			return (cancel_all_handlers());
+		} else {
+			int id = (int)strtol(cancel, &end, 10);
+			if (*end != '\0') {
+				(void) fprintf(stderr, "invalid handle id '%s':"
+				    " must be an integer or 'all'\n", cancel);
+				usage();
+				return (1);
+			}
+			return (cancel_handler(id));
+		}
+	}
+
+	if (device != NULL) {
+		/*
+		 * Device (-d) injection uses a completely different mechanism
+		 * for doing injection, so handle it separately here.
+		 */
+		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+		    level != 0) {
+			(void) fprintf(stderr, "device (-d) incompatible with "
+			    "data error injection\n");
+			usage();
+			return (2);
+		}
+
+		if (argc != 1) {
+			(void) fprintf(stderr, "device (-d) injection requires "
+			    "a single pool name\n");
+			usage();
+			return (2);
+		}
+
+		(void) strcpy(pool, argv[0]);
+		dataset[0] = '\0';
+
+		if (error == ECKSUM) {
+			(void) fprintf(stderr, "device error type must be "
+			    "'io' or 'nxio'\n");
+			return (1);
+		}
+
+		if (translate_device(pool, device, label, &record) != 0)
+			return (1);
+		if (!error)
+			error = ENXIO;
+	} else if (raw != NULL) {
+		if (range != NULL || type != TYPE_INVAL || level != 0) {
+			(void) fprintf(stderr, "raw (-b) format with "
+			    "any other options\n");
+			usage();
+			return (2);
+		}
+
+		if (argc != 1) {
+			(void) fprintf(stderr, "raw (-b) format expects a "
+			    "single pool name\n");
+			usage();
+			return (2);
+		}
+
+		(void) strcpy(pool, argv[0]);
+		dataset[0] = '\0';
+
+		if (error == ENXIO) {
+			(void) fprintf(stderr, "data error type must be "
+			    "'checksum' or 'io'\n");
+			return (1);
+		}
+
+		if (translate_raw(raw, &record) != 0)
+			return (1);
+		if (!error)
+			error = EIO;
+	} else if (type == TYPE_INVAL) {
+		if (flags == 0) {
+			(void) fprintf(stderr, "at least one of '-b', '-d', "
+			    "'-t', '-a', or '-u' must be specified\n");
+			usage();
+			return (2);
+		}
+
+		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
+			(void) strcpy(pool, argv[0]);
+			dataset[0] = '\0';
+		} else if (argc != 0) {
+			(void) fprintf(stderr, "extraneous argument for "
+			    "'-f'\n");
+			usage();
+			return (2);
+		}
+
+		flags |= ZINJECT_NULL;
+	} else {
+		if (argc != 1) {
+			(void) fprintf(stderr, "missing object\n");
+			usage();
+			return (2);
+		}
+
+		if (error == ENXIO) {
+			(void) fprintf(stderr, "data error type must be "
+			    "'checksum' or 'io'\n");
+			return (1);
+		}
+
+		if (translate_record(type, argv[0], range, level, &record, pool,
+		    dataset) != 0)
+			return (1);
+		if (!error)
+			error = EIO;
+	}
+
+	/*
+	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
+	 * unload the pool, so that we trigger spa-wide reopen of metadata next
+	 * time we access the pool.
+	 */
+	if (dataset[0] != '\0' && domount) {
+		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
+			return (1);
+
+		if (zfs_unmount(zhp, NULL, 0) != 0)
+			return (1);
+	}
+
+	record.zi_error = error;
+
+	ret = register_handler(pool, flags, &record, quiet);
+
+	if (dataset[0] != '\0' && domount)
+		ret = (zfs_mount(zhp, NULL, 0) != 0);
+
+	libzfs_fini(g_zfs);
+
+	return (ret);
+}
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.h
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZINJECT_H
+#define	_ZINJECT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/zfs_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+	TYPE_DATA,		/* plain file contents		*/
+	TYPE_DNODE,		/* metadnode contents		*/
+	TYPE_MOS,		/* all MOS data			*/
+	TYPE_MOSDIR,		/* MOS object directory		*/
+	TYPE_METASLAB,		/* metaslab objects		*/
+	TYPE_CONFIG,		/* MOS config			*/
+	TYPE_BPLIST,		/* block pointer list		*/
+	TYPE_SPACEMAP,		/* space map objects		*/
+	TYPE_ERRLOG,		/* persistent error log		*/
+	TYPE_LABEL_UBERBLOCK,	/* label specific uberblock	*/
+	TYPE_LABEL_NVLIST,	/* label specific nvlist	*/
+	TYPE_INVAL
+} err_type_t;
+
+#define	MOS_TYPE(t)	\
+	((t) >= TYPE_MOS && (t) < TYPE_LABEL_UBERBLOCK)
+
+#define	LABEL_TYPE(t)	\
+	((t) >= TYPE_LABEL_UBERBLOCK && (t) < TYPE_INVAL)
+
+int translate_record(err_type_t type, const char *object, const char *range,
+    int level, zinject_record_t *record, char *poolname, char *dataset);
+int translate_raw(const char *raw, zinject_record_t *record);
+int translate_device(const char *pool, const char *device,
+    err_type_t label_type, zinject_record_t *record);
+void usage(void);
+
+extern libzfs_handle_t *g_zfs;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZINJECT_H */
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
@ -53,6 +53,7 @@ struct zpool_list {
 	boolean_t	zl_findall;
 	uu_avl_t	*zl_avl;
 	uu_avl_pool_t	*zl_pool;
+	zprop_list_t	**zl_proplist;
 };

 /* ARGSUSED */
@ -81,6 +82,12 @@ add_pool(zpool_handle_t *zhp, void *data)
 	node->zn_handle = zhp;
 	uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
 	if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
+		if (zlp->zl_proplist &&
+		    zpool_expand_proplist(zhp, zlp->zl_proplist) != 0) {
+			zpool_close(zhp);
+			free(node);
+			return (-1);
+		}
 		uu_avl_insert(zlp->zl_avl, node, idx);
 	} else {
 		zpool_close(zhp);
@ -98,7 +105,7 @@ add_pool(zpool_handle_t *zhp, void *data)
 * line.
 */
 zpool_list_t *
-pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
+pool_list_get(int argc, char **argv, zprop_list_t **proplist, int *err)
 {
 	zpool_list_t *zlp;

@ -114,6 +121,8 @@ pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
 	    UU_DEFAULT)) == NULL)
 		zpool_no_memory();

+	zlp->zl_proplist = proplist;
+
 	if (argc == 0) {
 		(void) zpool_iter(g_zfs, add_pool, zlp);
 		zlp->zl_findall = B_TRUE;
@ -123,13 +132,12 @@ pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
 		for (i = 0; i < argc; i++) {
 			zpool_handle_t *zhp;

-			if ((zhp = zpool_open_canfail(g_zfs,
-			    argv[i])) != NULL && add_pool(zhp, zlp) == 0) {
-				if (proplist &&
-				    zpool_expand_proplist(zhp, proplist) != 0)
+			if (zhp = zpool_open_canfail(g_zfs, argv[i])) {
+				if (add_pool(zhp, zlp) != 0)
 					*err = B_TRUE;
-			} else
+			} else {
 				*err = B_TRUE;
+			}
 		}
 	}

@ -228,7 +236,7 @@ pool_list_count(zpool_list_t *zlp)
 */
 int
 for_each_pool(int argc, char **argv, boolean_t unavail,
-    zpool_proplist_t **proplist, zpool_iter_f func, void *data)
+    zprop_list_t **proplist, zpool_iter_f func, void *data)
 {
 	zpool_list_t *list;
 	int ret = 0;
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -77,3 +77,28 @@ zpool_no_memory(void)
 	    gettext("internal error: out of memory\n"));
 	exit(1);
 }
+
+/*
+ * Return the number of logs in supplied nvlist
+ */
+uint_t
+num_logs(nvlist_t *nv)
+{
+	uint_t nlogs = 0;
+	uint_t c, children;
+	nvlist_t **child;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (0);
+
+	for (c = 0; c < children; c++) {
+		uint64_t is_log = B_FALSE;
+
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log);
+		if (is_log)
+			nlogs++;
+	}
+	return (nlogs);
+}
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
@ -19,15 +19,13 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef	ZPOOL_UTIL_H
 #define	ZPOOL_UTIL_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <libnvpair.h>
 #include <libzfs.h>

@ -41,22 +39,24 @@ extern "C" {
 void *safe_malloc(size_t);
 char *safe_strdup(const char *);
 void zpool_no_memory(void);
+uint_t num_logs(nvlist_t *nv);

 /*
 * Virtual device functions
 */
-nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
-    boolean_t isreplace, int argc, char **argv);
+
+nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
+    boolean_t isreplace, boolean_t dryrun, int argc, char **argv);

 /*
 * Pool list functions
 */
-int for_each_pool(int, char **, boolean_t unavail, zpool_proplist_t **,
+int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **,
    zpool_iter_f, void *);

 typedef struct zpool_list zpool_list_t;

-zpool_list_t *pool_list_get(int, char **, zpool_proplist_t **, int *);
+zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *);
 void pool_list_update(zpool_list_t *);
 int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
 void pool_list_free(zpool_list_t *);
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
@ -20,12 +20,10 @@
 */

 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
 * Functions to convert between a list of vdevs and an nvlist representing the
 * configuration.  Each entry in the list can be one of:
@ -48,8 +46,8 @@
 * Hot spares are a special case, and passed down as an array of disk vdevs, at
 * the same level as the root of the vdev tree.
 *
- * The only function exported by this file is 'get_vdev_spec'.  The function
- * performs several passes:
+ * The only function exported by this file is 'make_root_vdev'.  The
+ * function performs several passes:
 *
 * 	1. Construct the vdev specification.  Performs syntax validation and
 *         makes sure each device is valid.
@ -59,6 +57,7 @@
 * 	3. Check for replication errors if the 'force' flag is not specified.
 *         validates that the replication level is consistent across the
 *         entire pool.
+ * 	4. Call libzfs to label any whole disks with an EFI label.
 */

 #include <assert.h>
@ -76,8 +75,6 @@
 #include <sys/mntent.h>
 #include <libgeom.h>

-#include <libzfs.h>
-
 #include "zpool_util.h"

 /*
@ -111,53 +108,105 @@ vdev_error(const char *fmt, ...)
 }

 /*
- * Validate a GEOM provider.
+ * Check that a file is valid.  All we can do in this case is check that it's
+ * not in use by another pool, and not in use by swap.
 */
+static int
+check_file(const char *file, boolean_t force, boolean_t isspare)
+{
+	char  *name;
+	int fd;
+	int ret = 0;
+	int err;
+	pool_state_t state;
+	boolean_t inuse;
+
+#if 0
+	if (dm_inuse_swap(file, &err)) {
+		if (err)
+			libdiskmgt_error(err);
+		else
+			vdev_error(gettext("%s is currently used by swap. "
+			    "Please see swap(1M).\n"), file);
+		return (-1);
+	}
+#endif
+
+	if ((fd = open(file, O_RDONLY)) < 0)
+		return (0);
+
+	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
+		const char *desc;
+
+		switch (state) {
+		case POOL_STATE_ACTIVE:
+			desc = gettext("active");
+			break;
+
+		case POOL_STATE_EXPORTED:
+			desc = gettext("exported");
+			break;
+
+		case POOL_STATE_POTENTIALLY_ACTIVE:
+			desc = gettext("potentially active");
+			break;
+
+		default:
+			desc = gettext("unknown");
+			break;
+		}
+
+		/*
+		 * Allow hot spares to be shared between pools.
+		 */
+		if (state == POOL_STATE_SPARE && isspare)
+			return (0);
+
+		if (state == POOL_STATE_ACTIVE ||
+		    state == POOL_STATE_SPARE || !force) {
+			switch (state) {
+			case POOL_STATE_SPARE:
+				vdev_error(gettext("%s is reserved as a hot "
+				    "spare for pool %s\n"), file, name);
+				break;
+			default:
+				vdev_error(gettext("%s is part of %s pool "
+				    "'%s'\n"), file, desc, name);
+				break;
+			}
+			ret = -1;
+		}
+
+		free(name);
+	}
+
+	(void) close(fd);
+	return (ret);
+}
+
 static int
 check_provider(const char *name, boolean_t force, boolean_t isspare)
 {
-	struct gmesh mesh;
-	struct gclass *mp;
-	struct ggeom *gp;
-	struct gprovider *pp;
-	int rv;
+	char path[MAXPATHLEN];

-	/* XXX: What to do with isspare? */
+	if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0)
+		snprintf(path, sizeof(path), "%s%s", _PATH_DEV, name);
+	else
+		strlcpy(path, name, sizeof(path));

-	if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
-		name += sizeof(_PATH_DEV) - 1;
-
-	rv = geom_gettree(&mesh);
-	assert(rv == 0);
-
-	pp = NULL;
-	LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
-		LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
-			LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
-				if (strcmp(pp->lg_name, name) == 0)
-					goto out;
-			}
-		}
-	}
-out:
-	rv = -1;
-	if (pp == NULL)
-		vdev_error("no such provider %s\n", name);
-	else {
-		int acr, acw, ace;
-
-		VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
-		if (acw == 0 && ace == 0)
-			rv = 0;
-		else
-			vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
-	}
-	geom_deletetree(&mesh);
-	return (rv);
+	return (check_file(path, force, isspare));
 }

+/*
+ * By "whole disk" we mean an entire physical disk (something we can
+ * label, toggle the write cache on, etc.) as opposed to the full
+ * capacity of a pseudo-device such as lofi or did.  We act as if we
+ * are labeling the disk, which should be a pretty good test of whether
+ * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
+ * it isn't.
+ */
 static boolean_t
-is_provider(const char *name)
+is_whole_disk(const char *name)
 {
 	int fd;

@ -167,8 +216,8 @@ is_provider(const char *name)
 		return (B_TRUE);
 	}
 	return (B_FALSE);
-
 }
+
 /*
 * Create a leaf vdev.  Determine if this is a GEOM provider.
 * Valid forms for a leaf vdev are:
@ -176,25 +225,81 @@ is_provider(const char *name)
 * 	/dev/xxx	Complete path to a GEOM provider
 * 	xxx		Shorthand for /dev/xxx
 */
-nvlist_t *
-make_leaf_vdev(const char *arg)
+static nvlist_t *
+make_leaf_vdev(const char *arg, uint64_t is_log)
 {
-	char ident[DISK_IDENT_SIZE], path[MAXPATHLEN];
+	char path[MAXPATHLEN];
 	struct stat64 statbuf;
 	nvlist_t *vdev = NULL;
 	char *type = NULL;
 	boolean_t wholedisk = B_FALSE;

-	if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
-		strlcpy(path, arg, sizeof (path));
-	else
-		snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
+	/*
+	 * Determine what type of vdev this is, and put the full path into
+	 * 'path'.  We detect whether this is a device of file afterwards by
+	 * checking the st_mode of the file.
+	 */
+	if (arg[0] == '/') {
+		/*
+		 * Complete device or file path.  Exact type is determined by
+		 * examining the file descriptor afterwards.
+		 */
+		wholedisk = is_whole_disk(arg);
+		if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
+			(void) fprintf(stderr,
+			    gettext("cannot open '%s': %s\n"),
+			    arg, strerror(errno));
+			return (NULL);
+		}

-	if (is_provider(path))
+		(void) strlcpy(path, arg, sizeof (path));
+	} else {
+		/*
+		 * This may be a short path for a device, or it could be total
+		 * gibberish.  Check to see if it's a known device in
+		 * /dev/dsk/.  As part of this check, see if we've been given a
+		 * an entire disk (minus the slice number).
+		 */
+		if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+			strlcpy(path, arg, sizeof (path));
+		else
+			snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
+		wholedisk = is_whole_disk(path);
+		if (!wholedisk && (stat64(path, &statbuf) != 0)) {
+			/*
+			 * If we got ENOENT, then the user gave us
+			 * gibberish, so try to direct them with a
+			 * reasonable error message.  Otherwise,
+			 * regurgitate strerror() since it's the best we
+			 * can do.
+			 */
+			if (errno == ENOENT) {
+				(void) fprintf(stderr,
+				    gettext("cannot open '%s': no such "
+				    "GEOM provider\n"), arg);
+				(void) fprintf(stderr,
+				    gettext("must be a full path or "
+				    "shorthand device name\n"));
+				return (NULL);
+			} else {
+				(void) fprintf(stderr,
+				    gettext("cannot open '%s': %s\n"),
+				    path, strerror(errno));
+				return (NULL);
+			}
+		}
+	}
+
+	/*
+	 * Determine whether this is a device or a file.
+	 */
+	if (wholedisk) {
 		type = VDEV_TYPE_DISK;
-	else {
+	} else if (S_ISREG(statbuf.st_mode)) {
+		type = VDEV_TYPE_FILE;
+	} else {
 		(void) fprintf(stderr, gettext("cannot use '%s': must be a "
-		    "GEOM provider\n"), path);
+		    "GEOM provider or regular file\n"), path);
 		return (NULL);
 	}

@ -206,6 +311,7 @@ make_leaf_vdev(const char *arg)
 	verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
+	verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
 	if (strcmp(type, VDEV_TYPE_DISK) == 0)
 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 		    (uint64_t)B_FALSE) == 0);
@ -267,12 +373,14 @@ typedef struct replication_level {
 	uint64_t zprl_parity;
 } replication_level_t;

+#define	ZPOOL_FUZZ	(16 * 1024 * 1024)
+
 /*
 * Given a list of toplevel vdevs, return the current replication level.  If
 * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
 * an error message will be displayed for each self-inconsistent vdev.
 */
-replication_level_t *
+static replication_level_t *
 get_replication(nvlist_t *nvroot, boolean_t fatal)
 {
 	nvlist_t **top;
@ -291,10 +399,20 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)

 	lastrep.zprl_type = NULL;
 	for (t = 0; t < toplevels; t++) {
+		uint64_t is_log = B_FALSE;
+
 		nv = top[t];

-		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+		/*
+		 * For separate logs we ignore the top level vdev replication
+		 * constraints.
+		 */
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
+		if (is_log)
+			continue;

+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
 		if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 		    &child, &children) != 0) {
 			/*
@ -328,7 +446,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 			}

 			/*
-			 * The 'dontreport' variable indicatest that we've
+			 * The 'dontreport' variable indicates that we've
 			 * already reported an error for this spec, so don't
 			 * bother doing it again.
 			 */
@ -349,7 +467,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 				    ZPOOL_CONFIG_TYPE, &childtype) == 0);

 				/*
-				 * If this is a a replacing or spare vdev, then
+				 * If this is a replacing or spare vdev, then
 				 * get the real first child of the vdev.
 				 */
 				if (strcmp(childtype,
@ -409,22 +527,30 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 				 */
 				if ((fd = open(path, O_RDONLY)) >= 0) {
 					err = fstat64(fd, &statbuf);
+					if (err == 0 &&
+					    S_ISCHR(statbuf.st_mode)) {
+						err = ioctl(fd, DIOCGMEDIASIZE,
+						    &statbuf.st_size);
+					}
 					(void) close(fd);
 				} else {
 					err = stat64(path, &statbuf);
 				}
-
 				if (err != 0 || statbuf.st_size == 0)
 					continue;

 				size = statbuf.st_size;

 				/*
-				 * Also check the size of each device.  If they
-				 * differ, then report an error.
+				 * Also make sure that devices and
+				 * slices have a consistent size.  If
+				 * they differ by a significant amount
+				 * (~16MB) then report an error.
 				 */
-				if (!dontreport && vdev_size != -1ULL &&
-				    size != vdev_size) {
+				if (!dontreport &&
+				    (vdev_size != -1ULL &&
+				    (labs(size - vdev_size) >
+				    ZPOOL_FUZZ))) {
 					if (ret != NULL)
 						free(ret);
 					ret = NULL;
@ -506,9 +632,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 * has a consistent replication level, then we ignore any errors.  Otherwise,
 * report any difference between the two.
 */
-int
+static int
 check_replication(nvlist_t *config, nvlist_t *newroot)
 {
+	nvlist_t **child;
+	uint_t	children;
 	replication_level_t *current = NULL, *new;
 	int ret;

@ -524,6 +652,23 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
 		if ((current = get_replication(nvroot, B_FALSE)) == NULL)
 			return (0);
 	}
+	/*
+	 * for spares there may be no children, and therefore no
+	 * replication level to check
+	 */
+	if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0) || (children == 0)) {
+		free(current);
+		return (0);
+	}
+
+	/*
+	 * If all we have is logs then there's no replication level to check.
+	 */
+	if (num_logs(newroot) == children) {
+		free(current);
+		return (0);
+	}

 	/*
 	 * Get the replication level of the new vdev spec, reporting any
@ -621,7 +766,7 @@ is_spare(nvlist_t *config, const char *path)
 * Go through and find any devices that are in use.  We rely on libdiskmgt for
 * the majority of this task.
 */
-int
+static int
 check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
    int isspare)
 {
@ -653,6 +798,9 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
 		if (strcmp(type, VDEV_TYPE_DISK) == 0)
 			ret = check_provider(path, force, isspare);

+		if (strcmp(type, VDEV_TYPE_FILE) == 0)
+			ret = check_file(path, force, isspare);
+
 		return (ret);
 	}

@ -668,10 +816,17 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
 			    isreplacing, B_TRUE)) != 0)
 				return (ret);

+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = check_in_use(config, child[c], force,
+			    isreplacing, B_FALSE)) != 0)
+				return (ret);
+
 	return (0);
 }

-const char *
+static const char *
 is_grouping(const char *type, int *mindev)
 {
 	if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
@ -698,6 +853,18 @@ is_grouping(const char *type, int *mindev)
 		return (VDEV_TYPE_SPARE);
 	}

+	if (strcmp(type, "log") == 0) {
+		if (mindev != NULL)
+			*mindev = 1;
+		return (VDEV_TYPE_LOG);
+	}
+
+	if (strcmp(type, "cache") == 0) {
+		if (mindev != NULL)
+			*mindev = 1;
+		return (VDEV_TYPE_L2CACHE);
+	}
+
 	return (NULL);
 }

@ -710,14 +877,21 @@ is_grouping(const char *type, int *mindev)
 nvlist_t *
 construct_spec(int argc, char **argv)
 {
-	nvlist_t *nvroot, *nv, **top, **spares;
-	int t, toplevels, mindev, nspares;
+	nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
+	int t, toplevels, mindev, nspares, nlogs, nl2cache;
 	const char *type;
+	uint64_t is_log;
+	boolean_t seen_logs;

 	top = NULL;
 	toplevels = 0;
 	spares = NULL;
+	l2cache = NULL;
 	nspares = 0;
+	nlogs = 0;
+	nl2cache = 0;
+	is_log = B_FALSE;
+	seen_logs = B_FALSE;

 	while (argc > 0) {
 		nv = NULL;
@ -730,12 +904,56 @@ construct_spec(int argc, char **argv)
 			nvlist_t **child = NULL;
 			int c, children = 0;

-			if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
-			    spares != NULL) {
-				(void) fprintf(stderr, gettext("invalid vdev "
-				    "specification: 'spare' can be "
-				    "specified only once\n"));
-				return (NULL);
+			if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
+				if (spares != NULL) {
+					(void) fprintf(stderr,
+					    gettext("invalid vdev "
+					    "specification: 'spare' can be "
+					    "specified only once\n"));
+					return (NULL);
+				}
+				is_log = B_FALSE;
+			}
+
+			if (strcmp(type, VDEV_TYPE_LOG) == 0) {
+				if (seen_logs) {
+					(void) fprintf(stderr,
+					    gettext("invalid vdev "
+					    "specification: 'log' can be "
+					    "specified only once\n"));
+					return (NULL);
+				}
+				seen_logs = B_TRUE;
+				is_log = B_TRUE;
+				argc--;
+				argv++;
+				/*
+				 * A log is not a real grouping device.
+				 * We just set is_log and continue.
+				 */
+				continue;
+			}
+
+			if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+				if (l2cache != NULL) {
+					(void) fprintf(stderr,
+					    gettext("invalid vdev "
+					    "specification: 'cache' can be "
+					    "specified only once\n"));
+					return (NULL);
+				}
+				is_log = B_FALSE;
+			}
+
+			if (is_log) {
+				if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+					(void) fprintf(stderr,
+					    gettext("invalid vdev "
+					    "specification: unsupported 'log' "
+					    "device: %s\n"), type);
+					return (NULL);
+				}
+				nlogs++;
 			}

 			for (c = 1; c < argc; c++) {
@ -746,7 +964,8 @@ construct_spec(int argc, char **argv)
 				    children * sizeof (nvlist_t *));
 				if (child == NULL)
 					zpool_no_memory();
-				if ((nv = make_leaf_vdev(argv[c])) == NULL)
+				if ((nv = make_leaf_vdev(argv[c], B_FALSE))
+				    == NULL)
 					return (NULL);
 				child[children - 1] = nv;
 			}
@ -765,11 +984,17 @@ construct_spec(int argc, char **argv)
 				spares = child;
 				nspares = children;
 				continue;
+			} else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+				l2cache = child;
+				nl2cache = children;
+				continue;
 			} else {
 				verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
 				    0) == 0);
 				verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
 				    type) == 0);
+				verify(nvlist_add_uint64(nv,
+				    ZPOOL_CONFIG_IS_LOG, is_log) == 0);
 				if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
 					verify(nvlist_add_uint64(nv,
 					    ZPOOL_CONFIG_NPARITY,
@ -788,8 +1013,10 @@ construct_spec(int argc, char **argv)
 			 * We have a device.  Pass off to make_leaf_vdev() to
 			 * construct the appropriate nvlist describing the vdev.
 			 */
-			if ((nv = make_leaf_vdev(argv[0])) == NULL)
+			if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
 				return (NULL);
+			if (is_log)
+				nlogs++;
 			argc--;
 			argv++;
 		}
@ -801,13 +1028,19 @@ construct_spec(int argc, char **argv)
 		top[toplevels - 1] = nv;
 	}

-	if (toplevels == 0 && nspares == 0) {
+	if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
 		(void) fprintf(stderr, gettext("invalid vdev "
 		    "specification: at least one toplevel vdev must be "
 		    "specified\n"));
 		return (NULL);
 	}

+	if (seen_logs && nlogs == 0) {
+		(void) fprintf(stderr, gettext("invalid vdev specification: "
+		    "log requires at least 1 device\n"));
+		return (NULL);
+	}
+
 	/*
 	 * Finally, create nvroot and add all top-level vdevs to it.
 	 */
@ -819,18 +1052,26 @@ construct_spec(int argc, char **argv)
 	if (nspares != 0)
 		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 		    spares, nspares) == 0);
+	if (nl2cache != 0)
+		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    l2cache, nl2cache) == 0);

 	for (t = 0; t < toplevels; t++)
 		nvlist_free(top[t]);
 	for (t = 0; t < nspares; t++)
 		nvlist_free(spares[t]);
+	for (t = 0; t < nl2cache; t++)
+		nvlist_free(l2cache[t]);
 	if (spares)
 		free(spares);
+	if (l2cache)
+		free(l2cache);
 	free(top);

 	return (nvroot);
 }

+
 /*
 * Get and validate the contents of the given vdev specification.  This ensures
 * that the nvlist returned is well-formed, that all the devices exist, and that
@ -842,11 +1083,11 @@ construct_spec(int argc, char **argv)
 * added, even if they appear in use.
 */
 nvlist_t *
-make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
-    boolean_t isreplacing, int argc, char **argv)
+make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
+    boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
 {
 	nvlist_t *newroot;
-
+	nvlist_t *poolconfig = NULL;
 	is_force = force;

 	/*
@ -857,6 +1098,9 @@ make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
 	if ((newroot = construct_spec(argc, argv)) == NULL)
 		return (NULL);

+	if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
+		return (NULL);
+
 	/*
 	 * Validate each device to make sure that its not shared with another
 	 * subsystem.  We do this even if 'force' is set, because there are some
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
--- a/cddl/contrib/opensolaris/head/assert.h
+++ b/cddl/contrib/opensolaris/head/assert.h
@ -39,7 +39,7 @@ extern "C" {

 #if defined(__STDC__)
 #if __STDC_VERSION__ - 0 >= 199901L
-extern void __assert_c99(const char *, const char *, int, const char *);
+extern void __assert(const char *, const char *, int);
 #else
 extern void __assert(const char *, const char *, int);
 #endif /* __STDC_VERSION__ - 0 >= 199901L */
@ -70,8 +70,7 @@ extern void _assert();

 #if defined(__STDC__)
 #if __STDC_VERSION__ - 0 >= 199901L
-#define	assert(EX) (void)((EX) || \
-	(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
+#define	assert(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
 #else
 #define	assert(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
 #endif /* __STDC_VERSION__ - 0 >= 199901L */
--- a/cddl/contrib/opensolaris/head/libintl.h
+++ b/cddl/contrib/opensolaris/head/libintl.h
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,11 +19,10 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-/* Libintl is a library of advanced internationalization functions. */

 #ifndef	_LIBINTL_H
 #define	_LIBINTL_H
@ -63,6 +61,9 @@ typedef long	wchar_t;

 #define	TEXTDOMAINMAX	256

+#define	__GNU_GETTEXT_SUPPORTED_REVISION(m)	\
+	((((m) == 0) || ((m) == 1)) ? 1 : -1)
+
 #ifdef __STDC__
 extern char *dcgettext(const char *, const char *, const int);
 extern char *dgettext(const char *, const char *);
--- a/cddl/contrib/opensolaris/head/synch.h
+++ b/cddl/contrib/opensolaris/head/synch.h
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -19,8 +18,9 @@
 *
 * CDDL HEADER END
 */
+
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -81,12 +81,12 @@ typedef lwp_cond_t cond_t;
 * Because we have to deal with C++, we cannot redefine this one as that one.
 */
 typedef struct _rwlock {
-	int32_t		readers;	/* -1 == writer else # of readers */
+	int32_t		readers;	/* rwstate word */
 	uint16_t	type;
 	uint16_t	magic;
-	mutex_t		mutex;		/* used to indicate ownership */
-	cond_t		readercv;	/* unused */
-	cond_t		writercv;	/* unused */
+	mutex_t		mutex;		/* used with process-shared rwlocks */
+	cond_t		readercv;	/* used only to indicate ownership */
+	cond_t		writercv;	/* used only to indicate ownership */
 } rwlock_t;

 #ifdef	__STDC__
@ -111,6 +111,7 @@ int	cond_signal(cond_t *);
 int	cond_broadcast(cond_t *);
 int	mutex_init(mutex_t *, int, void *);
 int	mutex_destroy(mutex_t *);
+int	mutex_consistent(mutex_t *);
 int	mutex_lock(mutex_t *);
 int	mutex_trylock(mutex_t *);
 int	mutex_unlock(mutex_t *);
@ -152,6 +153,7 @@ int	cond_signal();
 int	cond_broadcast();
 int	mutex_init();
 int	mutex_destroy();
+int	mutex_consistent();
 int	mutex_lock();
 int	mutex_trylock();
 int	mutex_unlock();
--- a/cddl/contrib/opensolaris/head/thread.h
+++ b/cddl/contrib/opensolaris/head/thread.h
@ -30,6 +30,7 @@
 #pragma ident	"%Z%%M%	%I%	%E% SMI"

 #include <pthread.h>
+#include <pthread_np.h>
 #include <assert.h>

 /*
@ -52,6 +53,7 @@ typedef pthread_rwlock_t rwlock_t;
 #define	mutex_lock(l)		pthread_mutex_lock(l)
 #define	mutex_trylock(l)	pthread_mutex_trylock(l)
 #define	mutex_unlock(l)		pthread_mutex_unlock(l)
+#define	mutex_owned(l)		pthread_mutex_isowned_np(l)
 #define	rwlock_init(l,f,a)	pthread_rwlock_init(l,NULL)
 #define	rwlock_destroy(l)	pthread_rwlock_destroy(l)
 #define	rw_rdlock(l)		pthread_rwlock_rdlock(l)
--- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
+++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,12 +19,13 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #pragma ident	"%Z%%M%	%I%	%E% SMI"

+#include <inttypes.h>
 #include <unistd.h>
 #include <strings.h>
 #include "libnvpair.h"
@ -137,6 +137,12 @@ nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
 			(void) fprintf(fp, " 0x%llx", (u_longlong_t)val);
 			break;
 		}
+		case DATA_TYPE_DOUBLE: {
+			double val;
+			(void) nvpair_value_double(nvp, &val);
+			(void) fprintf(fp, " 0x%llf", val);
+			break;
+		}
 		case DATA_TYPE_STRING: {
 			char *val;
 			(void) nvpair_value_string(nvp, &val);
@ -264,3 +270,348 @@ nvlist_print(FILE *fp, nvlist_t *nvl)
 {
 	nvlist_print_with_indent(fp, nvl, 0);
 }
+
+/*
+ * Determine if string 'value' matches 'nvp' value.  The 'value' string is
+ * converted, depending on the type of 'nvp', prior to match.  For numeric
+ * types, a radix independent sscanf conversion of 'value' is used. If 'nvp'
+ * is an array type, 'ai' is the index into the array against which we are
+ * checking for match. If nvp is of DATA_TYPE_STRING*, the caller can pass
+ * in a regex_t compilation of value in 'value_regex' to trigger regular
+ * expression string match instead of simple strcmp().
+ *
+ * Return 1 on match, 0 on no-match, and -1 on error.  If the error is
+ * related to value syntax error and 'ep' is non-NULL, *ep will point into
+ * the 'value' string at the location where the error exists.
+ *
+ * NOTE: It may be possible to move the non-regex_t version of this into
+ * common code used by library/kernel/boot.
+ */
+int
+nvpair_value_match_regex(nvpair_t *nvp, int ai,
+    char *value, regex_t *value_regex, char **ep)
+{
+	char	*evalue;
+	uint_t	a_len;
+	int	sr;
+
+	if (ep)
+		*ep = NULL;
+
+	if ((nvp == NULL) || (value == NULL))
+		return (-1);		/* error fail match - invalid args */
+
+	/* make sure array and index combination make sense */
+	if ((nvpair_type_is_array(nvp) && (ai < 0)) ||
+	    (!nvpair_type_is_array(nvp) && (ai >= 0)))
+		return (-1);		/* error fail match - bad index */
+
+	/* non-string values should be single 'chunk' */
+	if ((nvpair_type(nvp) != DATA_TYPE_STRING) &&
+	    (nvpair_type(nvp) != DATA_TYPE_STRING_ARRAY)) {
+		value += strspn(value, " \t");
+		evalue = value + strcspn(value, " \t");
+		if (*evalue) {
+			if (ep)
+				*ep = evalue;
+			return (-1);	/* error fail match - syntax */
+		}
+	}
+
+	sr = EOF;
+	switch (nvpair_type(nvp)) {
+	case DATA_TYPE_STRING: {
+		char	*val;
+
+		/* check string value for match */
+		if (nvpair_value_string(nvp, &val) == 0) {
+			if (value_regex) {
+				if (regexec(value_regex, val,
+				    (size_t)0, NULL, 0) == 0)
+					return (1);	/* match */
+			} else {
+				if (strcmp(value, val) == 0)
+					return (1);	/* match */
+			}
+		}
+		break;
+	}
+	case DATA_TYPE_STRING_ARRAY: {
+		char **val_array;
+
+		/* check indexed string value of array for match */
+		if ((nvpair_value_string_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len)) {
+			if (value_regex) {
+				if (regexec(value_regex, val_array[ai],
+				    (size_t)0, NULL, 0) == 0)
+					return (1);
+			} else {
+				if (strcmp(value, val_array[ai]) == 0)
+					return (1);
+			}
+		}
+		break;
+	}
+	case DATA_TYPE_BYTE: {
+		uchar_t val, val_arg;
+
+		/* scanf uchar_t from value and check for match */
+		sr = sscanf(value, "%c", &val_arg);
+		if ((sr == 1) && (nvpair_value_byte(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_BYTE_ARRAY: {
+		uchar_t *val_array, val_arg;
+
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%c", &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_byte_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT8: {
+		int8_t val, val_arg;
+
+		/* scanf int8_t from value and check for match */
+		sr = sscanf(value, "%"SCNi8, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int8(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT8_ARRAY: {
+		int8_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi8, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int8_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT8: {
+		uint8_t val, val_arg;
+
+		/* scanf uint8_t from value and check for match */
+		sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint8(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT8_ARRAY: {
+		uint8_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint8_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT16: {
+		int16_t val, val_arg;
+
+		/* scanf int16_t from value and check for match */
+		sr = sscanf(value, "%"SCNi16, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int16(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT16_ARRAY: {
+		int16_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi16, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int16_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT16: {
+		uint16_t val, val_arg;
+
+		/* scanf uint16_t from value and check for match */
+		sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint16(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT16_ARRAY: {
+		uint16_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint16_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT32: {
+		int32_t val, val_arg;
+
+		/* scanf int32_t from value and check for match */
+		sr = sscanf(value, "%"SCNi32, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int32(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT32_ARRAY: {
+		int32_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi32, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int32_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT32: {
+		uint32_t val, val_arg;
+
+		/* scanf uint32_t from value and check for match */
+		sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint32(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT32_ARRAY: {
+		uint32_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint32_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT64: {
+		int64_t val, val_arg;
+
+		/* scanf int64_t from value and check for match */
+		sr = sscanf(value, "%"SCNi64, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int64(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_INT64_ARRAY: {
+		int64_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi64, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_int64_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+				return (1);
+		break;
+	}
+	case DATA_TYPE_UINT64: {
+		uint64_t val_arg, val;
+
+		/* scanf uint64_t from value and check for match */
+		sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint64(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_UINT64_ARRAY: {
+		uint64_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_uint64_array(nvp, &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_BOOLEAN_VALUE: {
+		boolean_t val, val_arg;
+
+		/* scanf boolean_t from value and check for match */
+		sr = sscanf(value, "%"SCNi32, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_boolean_value(nvp, &val) == 0) &&
+		    (val == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_BOOLEAN_ARRAY: {
+		boolean_t *val_array, val_arg;
+
+		/* check indexed value of array for match */
+		sr = sscanf(value, "%"SCNi32, &val_arg);
+		if ((sr == 1) &&
+		    (nvpair_value_boolean_array(nvp,
+		    &val_array, &a_len) == 0) &&
+		    (ai < a_len) &&
+		    (val_array[ai] == val_arg))
+			return (1);
+		break;
+	}
+	case DATA_TYPE_HRTIME:
+	case DATA_TYPE_NVLIST:
+	case DATA_TYPE_NVLIST_ARRAY:
+	case DATA_TYPE_BOOLEAN:
+	case DATA_TYPE_DOUBLE:
+	case DATA_TYPE_UNKNOWN:
+	default:
+		/*
+		 * unknown/unsupported data type
+		 */
+		return (-1);		/* error fail match */
+	}
+
+	/*
+	 * check to see if sscanf failed conversion, return approximate
+	 * pointer to problem
+	 */
+	if (sr != 1) {
+		if (ep)
+			*ep = value;
+		return (-1);		/* error fail match  - syntax */
+	}
+
+	return (0);			/* fail match */
+}
+
+int
+nvpair_value_match(nvpair_t *nvp, int ai, char *value, char **ep)
+{
+	return (nvpair_value_match_regex(nvp, ai, value, NULL, ep));
+}
--- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
+++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -32,12 +31,15 @@
 #include <sys/nvpair.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <regex.h>

 #ifdef	__cplusplus
 extern "C" {
 #endif

 void nvlist_print(FILE *, nvlist_t *);
+int nvpair_value_match(nvpair_t *, int, char *, char **);
+int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **);

 #ifdef	__cplusplus
 }
--- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,15 +19,13 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef	_LIBUUTIL_H
 #define	_LIBUUTIL_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <solaris.h>
 #include <sys/types.h>
 #include <stdarg.h>
@ -149,6 +146,7 @@ extern int uu_open_tmp(const char *dir, uint_t uflags);
 /*PRINTFLIKE1*/
 extern char *uu_msprintf(const char *format, ...);
 extern void *uu_zalloc(size_t);
+extern char *uu_strdup(const char *);
 extern void uu_free(void *);

 /*
--- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -21,7 +20,7 @@
 */

 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -30,16 +29,6 @@

 #pragma ident	"%Z%%M%	%I%	%E% SMI"

-#include <solaris.h>
-
-/*
- * We don't bind to the internal libc interfaces if this is a
- * native build.
- */
-#ifndef NATIVE_BUILD
-#include "c_synonyms.h"
-#endif
-
 #include <libuutil.h>
 #include <libuutil_impl.h>

--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,12 +19,10 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include "libuutil_common.h"

 #include <stdarg.h>
@ -54,6 +51,22 @@ uu_free(void *p)
 	free(p);
 }

+char *
+uu_strdup(const char *str)
+{
+	char *buf = NULL;
+
+	if (str != NULL) {
+		size_t sz;
+
+		sz = strlen(str) + 1;
+		buf = uu_zalloc(sz);
+		if (buf != NULL)
+			(void) memcpy(buf, str, sz);
+	}
+	return (buf);
+}
+
 char *
 uu_msprintf(const char *format, ...)
 {
--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -120,7 +120,8 @@ uu_avl_pool_destroy(uu_avl_pool_t *pp)
 		    UU_PTR_ENCODE(&pp->uap_null_avl)) {
 			uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has "
 			    "outstanding avls, or is corrupt.\n",
-			    sizeof (pp->uap_name), pp->uap_name, pp);
+			    (int)sizeof (pp->uap_name), pp->uap_name,
+			    (void *)pp);
 		}
 	}
 	(void) pthread_mutex_lock(&uu_apool_list_lock);
@ -142,14 +143,14 @@ uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
 		if (offset + sizeof (*np) > pp->uap_objsize) {
 			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
 			    "offset %ld doesn't fit in object (size %ld)\n",
-			    base, np, pp, pp->uap_name, offset,
-			    pp->uap_objsize);
+			    base, (void *)np, (void *)pp, pp->uap_name,
+			    (long)offset, (long)pp->uap_objsize);
 		}
 		if (offset != pp->uap_nodeoffset) {
 			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
 			    "offset %ld doesn't match pool's offset (%ld)\n",
-			    base, np, pp, pp->uap_name, offset,
-			    pp->uap_objsize);
+			    base, (void *)np, (void *)pp, pp->uap_name,
+			    (long)offset, (long)pp->uap_objsize);
 		}
 	}

@ -166,12 +167,12 @@ uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
 		if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) {
 			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
 			    "node already finied\n",
-			    base, np, pp, pp->uap_name);
+			    base, (void *)np, (void *)pp, pp->uap_name);
 		}
 		if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) {
 			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
 			    "node corrupt, in tree, or in different pool\n",
-			    base, np, pp, pp->uap_name);
+			    base, (void *)np, (void *)pp, pp->uap_name);
 		}
 	}

@ -251,12 +252,13 @@ uu_avl_destroy(uu_avl_t *ap)

 	if (ap->ua_debug) {
 		if (avl_numnodes(&ap->ua_tree) != 0) {
-			uu_panic("uu_avl_destroy(%p): tree not empty\n", ap);
+			uu_panic("uu_avl_destroy(%p): tree not empty\n",
+			    (void *)ap);
 		}
 		if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk ||
 		    ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) {
 			uu_panic("uu_avl_destroy(%p):  outstanding walkers\n",
-			    ap);
+			    (void *)ap);
 		}
 	}
 	(void) pthread_mutex_lock(&pp->uap_lock);
@ -441,7 +443,7 @@ uu_avl_remove(uu_avl_t *ap, void *elem)
 				(void) _avl_walk_advance(wp, ap);
 		} else if (wp->uaw_next_result != NULL) {
 			uu_panic("uu_avl_remove(%p, %p): active non-robust "
-			    "walker\n", ap, elem);
+			    "walker\n", (void *)ap, elem);
 		}
 	}

@ -497,19 +499,19 @@ uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx)
 		if (na[1] != 0)
 			uu_panic("uu_avl_insert(%p, %p, %p): node already "
 			    "in tree, or corrupt\n",
-			    ap, elem, idx);
+			    (void *)ap, elem, (void *)idx);
 		if (na[0] == 0)
 			uu_panic("uu_avl_insert(%p, %p, %p): node not "
 			    "initialized\n",
-			    ap, elem, idx);
+			    (void *)ap, elem, (void *)idx);
 		if (na[0] != POOL_TO_MARKER(pp))
 			uu_panic("uu_avl_insert(%p, %p, %p): node from "
 			    "other pool, or corrupt\n",
-			    ap, elem, idx);
+			    (void *)ap, elem, (void *)idx);

 		if (!INDEX_VALID(ap, idx))
 			uu_panic("uu_avl_insert(%p, %p, %p): %s\n",
-			    ap, elem, idx,
+			    (void *)ap, elem, (void *)idx,
 			    INDEX_CHECK(idx)? "outdated index" :
 			    "invalid index");

@ -526,8 +528,8 @@ uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx)
 {
 	if (ap->ua_debug && !INDEX_VALID(ap, idx))
 		uu_panic("uu_avl_nearest_next(%p, %p): %s\n",
-		    ap, idx, INDEX_CHECK(idx)? "outdated index" :
-		    "invalid index");
+		    (void *)ap, (void *)idx, INDEX_CHECK(idx)?
+		    "outdated index" : "invalid index");
 	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER));
 }

@ -536,8 +538,8 @@ uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx)
 {
 	if (ap->ua_debug && !INDEX_VALID(ap, idx))
 		uu_panic("uu_avl_nearest_prev(%p, %p): %s\n",
-		    ap, idx, INDEX_CHECK(idx)? "outdated index" :
-		    "invalid index");
+		    (void *)ap, (void *)idx, INDEX_CHECK(idx)?
+		    "outdated index" : "invalid index");
 	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE));
 }

--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c
@ -33,7 +33,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <strings.h>
+#include <string.h>

 #define	FACILITY_FMT	"%s (%s): "

--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -117,7 +116,8 @@ uu_list_pool_destroy(uu_list_pool_t *pp)
 		    UU_PTR_ENCODE(&pp->ulp_null_list)) {
 			uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has "
 			    "outstanding lists, or is corrupt.\n",
-			    sizeof (pp->ulp_name), pp->ulp_name, pp);
+			    (int)sizeof (pp->ulp_name), pp->ulp_name,
+			    (void *)pp);
 		}
 	}
 	(void) pthread_mutex_lock(&uu_lpool_list_lock);
@ -139,14 +139,14 @@ uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
 		if (offset + sizeof (*np) > pp->ulp_objsize) {
 			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
 			    "offset %ld doesn't fit in object (size %ld)\n",
-			    base, np, pp, pp->ulp_name, offset,
-			    pp->ulp_objsize);
+			    base, (void *)np, (void *)pp, pp->ulp_name,
+			    (long)offset, (long)pp->ulp_objsize);
 		}
 		if (offset != pp->ulp_nodeoffset) {
 			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
 			    "offset %ld doesn't match pool's offset (%ld)\n",
-			    base, np, pp, pp->ulp_name, offset,
-			    pp->ulp_objsize);
+			    base, (void *)np, (void *)pp, pp->ulp_name,
+			    (long)offset, (long)pp->ulp_objsize);
 		}
 	}
 	np->uln_next = POOL_TO_MARKER(pp);
@ -163,13 +163,13 @@ uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
 		    np->uln_prev == NULL) {
 			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
 			    "node already finied\n",
-			    base, np_arg, pp, pp->ulp_name);
+			    base, (void *)np_arg, (void *)pp, pp->ulp_name);
 		}
 		if (np->uln_next != POOL_TO_MARKER(pp) ||
 		    np->uln_prev != NULL) {
 			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
 			    "node corrupt or on list\n",
-			    base, np_arg, pp, pp->ulp_name);
+			    base, (void *)np_arg, (void *)pp, pp->ulp_name);
 		}
 	}
 	np->uln_next = NULL;
@ -190,7 +190,7 @@ uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags)
 		if (pp->ulp_debug)
 			uu_panic("uu_list_create(%p, ...): requested "
 			    "UU_LIST_SORTED, but pool has no comparison func\n",
-			    pp);
+			    (void *)pp);
 		uu_set_error(UU_ERROR_NOT_SUPPORTED);
 		return (NULL);
 	}
@ -236,16 +236,16 @@ uu_list_destroy(uu_list_t *lp)
 		if (lp->ul_null_node.uln_next != &lp->ul_null_node ||
 		    lp->ul_null_node.uln_prev != &lp->ul_null_node) {
 			uu_panic("uu_list_destroy(%p):  list not empty\n",
-			    lp);
+			    (void *)lp);
 		}
 		if (lp->ul_numnodes != 0) {
 			uu_panic("uu_list_destroy(%p):  numnodes is nonzero, "
-			    "but list is empty\n", lp);
+			    "but list is empty\n", (void *)lp);
 		}
 		if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk ||
 		    lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) {
 			uu_panic("uu_list_destroy(%p):  outstanding walkers\n",
-			    lp);
+			    (void *)lp);
 		}
 	}

@ -266,13 +266,14 @@ list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev,
 	if (lp->ul_debug) {
 		if (next->uln_prev != prev || prev->uln_next != next)
 			uu_panic("insert(%p): internal error: %p and %p not "
-			    "neighbors\n", lp, next, prev);
+			    "neighbors\n", (void *)lp, (void *)next,
+			    (void *)prev);

 		if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) ||
 		    np->uln_prev != NULL) {
 			uu_panic("insert(%p): elem %p node %p corrupt, "
 			    "not initialized, or already in a list.\n",
-			    lp, NODE_TO_ELEM(lp, np), np);
+			    (void *)lp, NODE_TO_ELEM(lp, np), (void *)np);
 		}
 		/*
 		 * invalidate outstanding uu_list_index_ts.
@ -299,12 +300,12 @@ uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx)
 	if (lp->ul_debug) {
 		if (!INDEX_VALID(lp, idx))
 			uu_panic("uu_list_insert(%p, %p, %p): %s\n",
-			    lp, elem, idx,
+			    (void *)lp, elem, (void *)idx,
 			    INDEX_CHECK(idx)? "outdated index" :
 			    "invalid index");
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_insert(%p, %p, %p): out-of-date "
-			    "index\n", lp, elem, idx);
+			    "index\n", (void *)lp, elem, (void *)idx);
 	}

 	list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
@ -354,11 +355,12 @@ uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx)
 	if (lp->ul_debug) {
 		if (!INDEX_VALID(lp, idx))
 			uu_panic("uu_list_nearest_next(%p, %p): %s\n",
-			    lp, idx, INDEX_CHECK(idx)? "outdated index" :
+			    (void *)lp, (void *)idx,
+			    INDEX_CHECK(idx)? "outdated index" :
 			    "invalid index");
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_nearest_next(%p, %p): out-of-date "
-			    "index\n", lp, idx);
+			    "index\n", (void *)lp, (void *)idx);
 	}

 	if (np == &lp->ul_null_node)
@ -378,11 +380,11 @@ uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx)
 	if (lp->ul_debug) {
 		if (!INDEX_VALID(lp, idx))
 			uu_panic("uu_list_nearest_prev(%p, %p): %s\n",
-			    lp, idx, INDEX_CHECK(idx)? "outdated index" :
-			    "invalid index");
+			    (void *)lp, (void *)idx, INDEX_CHECK(idx)?
+			    "outdated index" : "invalid index");
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_nearest_prev(%p, %p): out-of-date "
-			    "index\n", lp, idx);
+			    "index\n", (void *)lp, (void *)idx);
 	}

 	if ((np = np->uln_prev) == &lp->ul_null_node)
@ -409,6 +411,11 @@ list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags)
 		wp->ulw_next_result = lp->ul_null_node.uln_prev;

 	if (lp->ul_debug || robust) {
+		/*
+		 * Add this walker to the list's list of walkers so
+		 * uu_list_remove() can advance us if somebody tries to
+		 * remove ulw_next_result.
+		 */
 		wp->ulw_next = next = &lp->ul_null_walk;
 		wp->ulw_prev = prev = next->ulw_prev;
 		next->ulw_prev = wp;
@ -538,7 +545,7 @@ uu_list_remove(uu_list_t *lp, void *elem)
 	if (lp->ul_debug) {
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_remove(%p, %p): elem not on list\n",
-			    lp, elem);
+			    (void *)lp, elem);
 		/*
 		 * invalidate outstanding uu_list_index_ts.
 		 */
@ -556,7 +563,7 @@ uu_list_remove(uu_list_t *lp, void *elem)
 				(void) list_walk_advance(wp, lp);
 		} else if (wp->ulw_next_result != NULL) {
 			uu_panic("uu_list_remove(%p, %p): active non-robust "
-			    "walker\n", lp, elem);
+			    "walker\n", (void *)lp, elem);
 		}
 	}

@ -578,8 +585,8 @@ uu_list_teardown(uu_list_t *lp, void **cookie)
 	 * XXX: disable list modification until list is empty
 	 */
 	if (lp->ul_debug && *cookie != NULL)
-		uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n", lp,
-		    cookie);
+		uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n",
+		    (void *)lp, (void *)cookie);

 	ep = uu_list_first(lp);
 	if (ep)
@ -599,12 +606,12 @@ uu_list_insert_before(uu_list_t *lp, void *target, void *elem)
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_insert_before(%p, %p, %p): %p is "
 			    "not currently on a list\n",
-			    lp, target, elem, target);
+			    (void *)lp, target, elem, target);
 	}
 	if (lp->ul_sorted) {
 		if (lp->ul_debug)
 			uu_panic("uu_list_insert_before(%p, ...): list is "
-			    "UU_LIST_SORTED\n", lp);
+			    "UU_LIST_SORTED\n", (void *)lp);
 		uu_set_error(UU_ERROR_NOT_SUPPORTED);
 		return (-1);
 	}
@ -625,12 +632,12 @@ uu_list_insert_after(uu_list_t *lp, void *target, void *elem)
 		if (np->uln_prev == NULL)
 			uu_panic("uu_list_insert_after(%p, %p, %p): %p is "
 			    "not currently on a list\n",
-			    lp, target, elem, target);
+			    (void *)lp, target, elem, target);
 	}
 	if (lp->ul_sorted) {
 		if (lp->ul_debug)
 			uu_panic("uu_list_insert_after(%p, ...): list is "
-			    "UU_LIST_SORTED\n", lp);
+			    "UU_LIST_SORTED\n", (void *)lp);
 		uu_set_error(UU_ERROR_NOT_SUPPORTED);
 		return (-1);
 	}
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@ -20,21 +20,20 @@
 */

 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef	_LIBZFS_H
 #define	_LIBZFS_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <assert.h>
 #include <libnvpair.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/varargs.h>
 #include <sys/fs/zfs.h>
+#include <sys/avl.h>
 #include <sys/zfs_ioctl.h>

 #ifdef	__cplusplus
@ -47,6 +46,7 @@ extern "C" {
 #define	ZFS_MAXNAMELEN		MAXNAMELEN
 #define	ZPOOL_MAXNAMELEN	MAXNAMELEN
 #define	ZFS_MAXPROPLEN		MAXPATHLEN
+#define	ZPOOL_MAXPROPLEN	MAXPATHLEN

 /*
 * libzfs errors
@ -99,9 +99,61 @@ enum {
 	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
 	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
 	EZFS_NAMETOOLONG,	/* dataset name is too long */
+	EZFS_OPENFAILED,	/* open of device failed */
+	EZFS_NOCAP,		/* couldn't get capacity */
+	EZFS_LABELFAILED,	/* write of label failed */
+	EZFS_ISCSISVCUNAVAIL,	/* iscsi service unavailable */
+	EZFS_BADWHO,		/* invalid permission who */
+	EZFS_BADPERM,		/* invalid permission */
+	EZFS_BADPERMSET,	/* invalid permission set name */
+	EZFS_NODELEGATION,	/* delegated administration is disabled */
+	EZFS_PERMRDONLY,	/* pemissions are readonly */
+	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
+	EZFS_SHARESMBFAILED,	/* failed to share over smb */
+	EZFS_BADCACHE,		/* bad cache file */
+	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
+	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
+	EZFS_NOTSUP,		/* ops not supported on this dataset */
+	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
 	EZFS_UNKNOWN
 };

+/*
+ * The following data structures are all part
+ * of the zfs_allow_t data structure which is
+ * used for printing 'allow' permissions.
+ * It is a linked list of zfs_allow_t's which
+ * then contain avl tree's for user/group/sets/...
+ * and each one of the entries in those trees have
+ * avl tree's for the permissions they belong to and
+ * whether they are local,descendent or local+descendent
+ * permissions.  The AVL trees are used primarily for
+ * sorting purposes, but also so that we can quickly find
+ * a given user and or permission.
+ */
+typedef struct zfs_perm_node {
+	avl_node_t z_node;
+	char z_pname[MAXPATHLEN];
+} zfs_perm_node_t;
+
+typedef struct zfs_allow_node {
+	avl_node_t z_node;
+	char z_key[MAXPATHLEN];		/* name, such as joe */
+	avl_tree_t z_localdescend;	/* local+descendent perms */
+	avl_tree_t z_local;		/* local permissions */
+	avl_tree_t z_descend;		/* descendent permissions */
+} zfs_allow_node_t;
+
+typedef struct zfs_allow {
+	struct zfs_allow *z_next;
+	char z_setpoint[MAXPATHLEN];
+	avl_tree_t z_sets;
+	avl_tree_t z_crperms;
+	avl_tree_t z_user;
+	avl_tree_t z_group;
+	avl_tree_t z_everyone;
+} zfs_allow_t;
+
 /*
 * Basic handle types
 */
@ -131,12 +183,9 @@ extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
 extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
 extern void zpool_close(zpool_handle_t *);
 extern const char *zpool_get_name(zpool_handle_t *);
-extern uint64_t zpool_get_guid(zpool_handle_t *);
-extern uint64_t zpool_get_space_used(zpool_handle_t *);
-extern uint64_t zpool_get_space_total(zpool_handle_t *);
-extern int zpool_get_root(zpool_handle_t *, char *, size_t);
 extern int zpool_get_state(zpool_handle_t *);
-extern uint64_t zpool_get_version(zpool_handle_t *);
+extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
+extern void zpool_free_handles(libzfs_handle_t *);

 /*
 * Iterate over all active pools in the system.
@ -148,7 +197,7 @@ extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
 * Functions to create and destroy pools
 */
 extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
-    const char *);
+    nvlist_t *, nvlist_t *);
 extern int zpool_destroy(zpool_handle_t *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);

@ -156,22 +205,33 @@ extern int zpool_add(zpool_handle_t *, nvlist_t *);
 * Functions to manipulate pool and vdev state
 */
 extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
+extern int zpool_clear(zpool_handle_t *, const char *);

-extern int zpool_vdev_online(zpool_handle_t *, const char *);
-extern int zpool_vdev_offline(zpool_handle_t *, const char *, int);
-extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *,
-    nvlist_t *, int);
+extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
+    vdev_state_t *);
+extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
+extern int zpool_vdev_attach(zpool_handle_t *, const char *,
+    const char *, nvlist_t *, int);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove(zpool_handle_t *, const char *);
-extern int zpool_clear(zpool_handle_t *, const char *);
-extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
+
+extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
+
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
+    boolean_t *, boolean_t *);
+extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);

 /*
 * Functions to manage pool properties
 */
 extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
-extern int zpool_get_prop(zpool_handle_t *, zfs_prop_t, char *,
-	size_t proplen, zfs_source_t *);
+extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
+    size_t proplen, zprop_source_t *);
+extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
+    zprop_source_t *);
+
 extern const char *zpool_prop_to_name(zpool_prop_t);
 extern const char *zpool_prop_values(zpool_prop_t);

@ -194,6 +254,11 @@ typedef enum {
 	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
 	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
 	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
+	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
+	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
+	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
+	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */

 	/*
 	 * The following are not faults per se, but still an error possibly
@ -223,26 +288,39 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
 /*
 * Import and export functions
 */
-extern int zpool_export(zpool_handle_t *);
+extern int zpool_export(zpool_handle_t *, boolean_t);
 extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
-    const char *);
+    char *altroot);
+extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
+    nvlist_t *, boolean_t);

 /*
 * Search for pools to import
 */
 extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
+extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
+    char *, uint64_t);
+extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
+    char *);
+extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
+    uint64_t);
+extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);

 /*
 * Miscellaneous pool functions
 */
+struct zfs_cmd;
+
 extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
-extern int zpool_upgrade(zpool_handle_t *);
+extern int zpool_upgrade(zpool_handle_t *, uint64_t);
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
-extern void zpool_log_history(libzfs_handle_t *, int, char **, const char *,
-    boolean_t, boolean_t);
+extern void zpool_set_history_str(const char *subcommand, int argc,
+    char **argv, char *history_str);
+extern int zpool_stage_history(libzfs_handle_t *, const char *);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
    size_t len);
-
+extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
+extern int zpool_get_physpath(zpool_handle_t *, char *);
 /*
 * Basic handle manipulations.  These functions do not create or destroy the
 * underlying datasets, only the references to them.
@ -251,65 +329,84 @@ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
 extern void zfs_close(zfs_handle_t *);
 extern zfs_type_t zfs_get_type(const zfs_handle_t *);
 extern const char *zfs_get_name(const zfs_handle_t *);
+extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);

 /*
 * Property management functions.  Some functions are shared with the kernel,
 * and are found in sys/fs/zfs.h.
 */
+
+/*
+ * zfs dataset property management
+ */
+extern const char *zfs_prop_default_string(zfs_prop_t);
+extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
+extern const char *zfs_prop_column_name(zfs_prop_t);
+extern boolean_t zfs_prop_align_right(zfs_prop_t);
+
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
+    nvlist_t *, uint64_t, zfs_handle_t *, const char *);
+
 extern const char *zfs_prop_to_name(zfs_prop_t);
 extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
 extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
-    zfs_source_t *, char *, size_t, boolean_t);
+    zprop_source_t *, char *, size_t, boolean_t);
 extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
-    zfs_source_t *, char *, size_t);
+    zprop_source_t *, char *, size_t);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-extern const char *zfs_prop_get_string(zfs_handle_t *, zfs_prop_t);
 extern int zfs_prop_inherit(zfs_handle_t *, const char *);
 extern const char *zfs_prop_values(zfs_prop_t);
-extern int zfs_prop_valid_for_type(zfs_prop_t, int);
-extern const char *zfs_prop_default_string(zfs_prop_t prop);
-extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
 extern int zfs_prop_is_string(zfs_prop_t prop);
-extern const char *zfs_prop_column_name(zfs_prop_t);
-extern boolean_t zfs_prop_align_right(zfs_prop_t);
-extern void nicebool(int value, char *buf, size_t buflen);
+extern nvlist_t *zfs_get_user_props(zfs_handle_t *);

-typedef struct zfs_proplist {
-	zfs_prop_t	pl_prop;
+typedef struct zprop_list {
+	int		pl_prop;
 	char		*pl_user_prop;
-	struct zfs_proplist *pl_next;
+	struct zprop_list *pl_next;
 	boolean_t	pl_all;
 	size_t		pl_width;
 	boolean_t	pl_fixed;
-} zfs_proplist_t;
+} zprop_list_t;

-typedef zfs_proplist_t zpool_proplist_t;
-
-extern int zfs_get_proplist(libzfs_handle_t *, char *, zfs_proplist_t **);
-extern int zpool_get_proplist(libzfs_handle_t *, char *, zpool_proplist_t **);
-extern int zfs_expand_proplist(zfs_handle_t *, zfs_proplist_t **);
-extern int zpool_expand_proplist(zpool_handle_t *, zpool_proplist_t **);
-extern void zfs_free_proplist(zfs_proplist_t *);
-extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);

 #define	ZFS_MOUNTPOINT_NONE	"none"
 #define	ZFS_MOUNTPOINT_LEGACY	"legacy"

 /*
- * Functions for printing properties from zfs/zpool
+ * zpool property management
 */
-typedef struct libzfs_get_cbdata {
+extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
+extern const char *zpool_prop_default_string(zpool_prop_t);
+extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
+extern const char *zpool_prop_column_name(zpool_prop_t);
+extern boolean_t zpool_prop_align_right(zpool_prop_t);
+
+/*
+ * Functions shared by zfs and zpool property management.
+ */
+extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
+    boolean_t ordered, zfs_type_t type);
+extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
+    zfs_type_t);
+extern void zprop_free_list(zprop_list_t *);
+
+/*
+ * Functions for printing zfs or zpool properties
+ */
+typedef struct zprop_get_cbdata {
 	int cb_sources;
 	int cb_columns[4];
 	int cb_colwidths[5];
 	boolean_t cb_scripted;
 	boolean_t cb_literal;
 	boolean_t cb_first;
-	zfs_proplist_t *cb_proplist;
-} libzfs_get_cbdata_t;
+	zprop_list_t *cb_proplist;
+	zfs_type_t cb_type;
+} zprop_get_cbdata_t;

-void libzfs_print_one_property(const char *, libzfs_get_cbdata_t *,
-    const char *, const char *, zfs_source_t, const char *);
+void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
+    const char *, const char *, zprop_source_t, const char *);

 #define	GET_COL_NAME		1
 #define	GET_COL_PROPERTY	2
@ -331,26 +428,61 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
 */
 extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
    nvlist_t *);
+extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
 extern int zfs_destroy(zfs_handle_t *);
 extern int zfs_destroy_snaps(zfs_handle_t *, char *);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
-extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
-extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
-extern int zfs_rename(zfs_handle_t *, const char *, int);
-extern int zfs_send(zfs_handle_t *, const char *, int);
-extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int,
-    boolean_t, int);
+extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
+extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
+extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
+extern int zfs_send(zfs_handle_t *, const char *, const char *,
+    boolean_t, boolean_t, boolean_t, boolean_t, int);
 extern int zfs_promote(zfs_handle_t *);

+typedef struct recvflags {
+	/* print informational messages (ie, -v was specified) */
+	int verbose : 1;
+
+	/* the destination is a prefix, not the exact fs (ie, -d) */
+	int isprefix : 1;
+
+	/* do not actually do the recv, just check if it would work (ie, -n) */
+	int dryrun : 1;
+
+	/* rollback/destroy filesystems as necessary (eg, -F) */
+	int force : 1;
+
+	/* set "canmount=off" on all modified filesystems */
+	int canmountoff : 1;
+
+	/* byteswap flag is used internally; callers need not specify */
+	int byteswap : 1;
+} recvflags_t;
+
+extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *);
+
 /*
 * Miscellaneous functions.
 */
 extern const char *zfs_type_to_name(zfs_type_t);
 extern void zfs_refresh_properties(zfs_handle_t *);
 extern int zfs_name_valid(const char *, zfs_type_t);
-extern int zfs_disable(zfs_handle_t *);
-extern int zfs_enable(zfs_handle_t *);
 extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
+    zfs_type_t);
+extern int zfs_spa_version(zfs_handle_t *, int *);
+
+/*
+ * dataset permission functions.
+ */
+extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
+extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
+extern int zfs_build_perms(zfs_handle_t *, char *, char *,
+    zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
+extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
+extern void zfs_free_allows(zfs_allow_t *);
+extern void zfs_deleg_permissions(void);

 /*
 * Mount support functions.
@ -369,15 +501,27 @@ extern int zfs_share(zfs_handle_t *);
 extern int zfs_unshare(zfs_handle_t *);

 /*
- * Protocol-specifc share support functions.
+ * Protocol-specific share support functions.
 */
 extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
+extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
 extern int zfs_share_nfs(zfs_handle_t *);
+extern int zfs_share_smb(zfs_handle_t *);
+extern int zfs_shareall(zfs_handle_t *);
 extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
+extern int zfs_unshare_smb(zfs_handle_t *, const char *);
 extern int zfs_unshareall_nfs(zfs_handle_t *);
+extern int zfs_unshareall_smb(zfs_handle_t *);
+extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
+extern int zfs_unshareall(zfs_handle_t *);
 extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
 extern int zfs_share_iscsi(zfs_handle_t *);
 extern int zfs_unshare_iscsi(zfs_handle_t *);
+#ifdef TODO
+extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
+#endif
+extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *,
+    void *, void *, int, zfs_share_op_t);

 /*
 * FreeBSD-specific jail support function.
@ -401,12 +545,6 @@ extern int zfs_jail(zfs_handle_t *, int, int);
 extern void zfs_nicenum(uint64_t, char *, size_t);
 extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);

-/*
- * Pool destroy special.  Remove the device information without destroying
- * the underlying dataset.
- */
-extern int zfs_remove_link(zfs_handle_t *);
-
 /*
 * Given a device or file, determine if it is part of a pool.
 */
@ -424,6 +562,9 @@ extern int zpool_read_label(int, nvlist_t **);
 extern int zpool_create_zvol_links(zpool_handle_t *);
 extern int zpool_remove_zvol_links(zpool_handle_t *);

+/* is this zvol valid for use as a dump device? */
+extern int zvol_check_dump_config(char *);
+
 /*
 * Enable and disable datasets within a pool by mounting/unmounting and
 * sharing/unsharing them.
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
@ -20,12 +20,12 @@
 */

 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
+ *
+ * Portions Copyright 2007 Ramprakash Jelari
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <libintl.h>
 #include <libuutil.h>
 #include <stddef.h>
@ -65,18 +65,21 @@ typedef struct prop_changenode {
 	int			cn_shared;
 	int			cn_mounted;
 	int			cn_zoned;
+	boolean_t		cn_needpost;	/* is postfix() needed? */
 	uu_list_node_t		cn_listnode;
 } prop_changenode_t;

 struct prop_changelist {
 	zfs_prop_t		cl_prop;
 	zfs_prop_t		cl_realprop;
+	zfs_prop_t		cl_shareprop;  /* used with sharenfs/sharesmb */
 	uu_list_pool_t		*cl_pool;
 	uu_list_t		*cl_list;
 	boolean_t		cl_waslegacy;
 	boolean_t		cl_allchildren;
 	boolean_t		cl_alldependents;
-	int			cl_flags;
+	int			cl_mflags;	/* Mount flags */
+	int			cl_gflags;	/* Gather request flags */
 	boolean_t		cl_haszonedchild;
 	boolean_t		cl_sorted;
 };
@ -84,7 +87,8 @@ struct prop_changelist {
 /*
 * If the property is 'mountpoint', go through and unmount filesystems as
 * necessary.  We don't do the same for 'sharenfs', because we can just re-share
- * with different options without interrupting service.
+ * with different options without interrupting service. We do handle 'sharesmb'
+ * since there may be old resource names that need to be removed.
 */
 int
 changelist_prefix(prop_changelist_t *clp)
@ -92,11 +96,19 @@ changelist_prefix(prop_changelist_t *clp)
 	prop_changenode_t *cn;
 	int ret = 0;

-	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT)
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
 		return (0);

 	for (cn = uu_list_first(clp->cl_list); cn != NULL;
 	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		/* if a previous loop failed, set the remaining to false */
+		if (ret == -1) {
+			cn->cn_needpost = B_FALSE;
+			continue;
+		}
+
 		/*
 		 * If we are in the global zone, but this dataset is exported
 		 * to a local zone, do nothing.
@ -114,8 +126,11 @@ changelist_prefix(prop_changelist_t *clp)
 				(void) zfs_unshare_iscsi(cn->cn_handle);

 				if (zvol_remove_link(cn->cn_handle->zfs_hdl,
-				    cn->cn_handle->zfs_name) != 0)
+				    cn->cn_handle->zfs_name) != 0) {
 					ret = -1;
+					cn->cn_needpost = B_FALSE;
+					(void) zfs_share_iscsi(cn->cn_handle);
+				}
 				break;

 			case ZFS_PROP_VOLSIZE:
@ -126,10 +141,28 @@ changelist_prefix(prop_changelist_t *clp)
 				(void) zfs_unshare_iscsi(cn->cn_handle);
 				break;
 			}
-		} else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0)
-			ret = -1;
+		} else {
+			/*
+			 * Do the property specific processing.
+			 */
+			switch (clp->cl_prop) {
+			case ZFS_PROP_MOUNTPOINT:
+				if (zfs_unmount(cn->cn_handle, NULL,
+				    clp->cl_mflags) != 0) {
+					ret = -1;
+					cn->cn_needpost = B_FALSE;
+				}
+				break;
+			case ZFS_PROP_SHARESMB:
+				(void) zfs_unshare_smb(cn->cn_handle, NULL);
+				break;
+			}
+		}
 	}

+	if (ret == -1)
+		(void) changelist_postfix(clp);
+
 	return (ret);
 }

@ -147,7 +180,8 @@ changelist_postfix(prop_changelist_t *clp)
 {
 	prop_changenode_t *cn;
 	char shareopts[ZFS_MAXPROPLEN];
-	int ret = 0;
+	int errors = 0;
+	libzfs_handle_t *hdl;

 	/*
 	 * If we're changing the mountpoint, attempt to destroy the underlying
@ -162,12 +196,29 @@ changelist_postfix(prop_changelist_t *clp)
 	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
 		remove_mountpoint(cn->cn_handle);

+	/*
+	 * It is possible that the changelist_prefix() used libshare
+	 * to unshare some entries. Since libshare caches data, an
+	 * attempt to reshare during postfix can fail unless libshare
+	 * is uninitialized here so that it will reinitialize later.
+	 */
+	if (cn->cn_handle != NULL) {
+		hdl = cn->cn_handle->zfs_hdl;
+		assert(hdl != NULL);
+		zfs_uninit_libshare(hdl);
+	}
+
 	/*
 	 * We walk the datasets in reverse, because we want to mount any parent
-	 * datasets before mounting the children.
+	 * datasets before mounting the children.  We walk all datasets even if
+	 * there are errors.
 	 */
 	for (cn = uu_list_last(clp->cl_list); cn != NULL;
 	    cn = uu_list_prev(clp->cl_list, cn)) {
+
+		boolean_t sharenfs;
+		boolean_t sharesmb;
+
 		/*
 		 * If we are in the global zone, but this dataset is exported
 		 * to a local zone, do nothing.
@ -175,6 +226,11 @@ changelist_postfix(prop_changelist_t *clp)
 		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
 			continue;

+		/* Only do post-processing if it's required */
+		if (!cn->cn_needpost)
+			continue;
+		cn->cn_needpost = B_FALSE;
+
 		zfs_refresh_properties(cn->cn_handle);

 		if (ZFS_IS_VOLUME(cn->cn_handle)) {
@ -185,7 +241,7 @@ changelist_postfix(prop_changelist_t *clp)
 			if (clp->cl_realprop == ZFS_PROP_NAME &&
 			    zvol_create_link(cn->cn_handle->zfs_hdl,
 			    cn->cn_handle->zfs_name) != 0) {
-				ret = -1;
+				errors++;
 			} else if (cn->cn_shared ||
 			    clp->cl_prop == ZFS_PROP_SHAREISCSI) {
 				if (zfs_prop_get(cn->cn_handle,
@ -193,43 +249,55 @@ changelist_postfix(prop_changelist_t *clp)
 				    sizeof (shareopts), NULL, NULL, 0,
 				    B_FALSE) == 0 &&
 				    strcmp(shareopts, "off") == 0) {
-					ret = zfs_unshare_iscsi(cn->cn_handle);
+					errors +=
+					    zfs_unshare_iscsi(cn->cn_handle);
 				} else {
-					ret = zfs_share_iscsi(cn->cn_handle);
+					errors +=
+					    zfs_share_iscsi(cn->cn_handle);
 				}
 			}

 			continue;
 		}

-		if ((clp->cl_waslegacy || cn->cn_mounted) &&
-		    !zfs_is_mounted(cn->cn_handle, NULL) &&
+		/*
+		 * Remount if previously mounted or mountpoint was legacy,
+		 * or sharenfs or sharesmb  property is set.
+		 */
+		sharenfs = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		sharesmb = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARESMB,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		if ((cn->cn_mounted || clp->cl_waslegacy || sharenfs ||
+		    sharesmb) && !zfs_is_mounted(cn->cn_handle, NULL) &&
 		    zfs_mount(cn->cn_handle, NULL, 0) != 0)
-			ret = -1;
+			errors++;

 		/*
 		 * We always re-share even if the filesystem is currently
 		 * shared, so that we can adopt any new options.
 		 */
-		if (cn->cn_shared ||
-		    (clp->cl_prop == ZFS_PROP_SHARENFS && clp->cl_waslegacy)) {
-			if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
-			    shareopts, sizeof (shareopts), NULL, NULL, 0,
-			    B_FALSE) == 0 && strcmp(shareopts, "off") == 0) {
-				ret = zfs_unshare_nfs(cn->cn_handle, NULL);
-			} else {
-				ret = zfs_share_nfs(cn->cn_handle);
-			}
-		}
+		if (sharenfs)
+			errors += zfs_share_nfs(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_nfs(cn->cn_handle, NULL);
+		if (sharesmb)
+			errors += zfs_share_smb(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_smb(cn->cn_handle, NULL);
 	}

-	return (ret);
+	return (errors ? -1 : 0);
 }

 /*
 * Is this "dataset" a child of "parent"?
 */
-static boolean_t
+boolean_t
 isa_child_of(const char *dataset, const char *parent)
 {
 	int len;
@ -280,21 +348,22 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
 }

 /*
- * Given a gathered changelist for the 'sharenfs' property, unshare all the
- * datasets in the list.
+ * Given a gathered changelist for the 'sharenfs' or 'sharesmb' property,
+ * unshare all the datasets in the list.
 */
 int
-changelist_unshare(prop_changelist_t *clp)
+changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
 {
 	prop_changenode_t *cn;
 	int ret = 0;

-	if (clp->cl_prop != ZFS_PROP_SHARENFS)
+	if (clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
 		return (0);

 	for (cn = uu_list_first(clp->cl_list); cn != NULL;
 	    cn = uu_list_next(clp->cl_list, cn)) {
-		if (zfs_unshare_nfs(cn->cn_handle, NULL) != 0)
+		if (zfs_unshare_proto(cn->cn_handle, NULL, proto) != 0)
 			ret = -1;
 	}

@ -316,14 +385,14 @@ changelist_haszonedchild(prop_changelist_t *clp)
 * Remove a node from a gathered list.
 */
 void
-changelist_remove(zfs_handle_t *zhp, prop_changelist_t *clp)
+changelist_remove(prop_changelist_t *clp, const char *name)
 {
 	prop_changenode_t *cn;

 	for (cn = uu_list_first(clp->cl_list); cn != NULL;
 	    cn = uu_list_next(clp->cl_list, cn)) {

-		if (strcmp(cn->cn_handle->zfs_name, zhp->zfs_name) == 0) {
+		if (strcmp(cn->cn_handle->zfs_name, name) == 0) {
 			uu_list_remove(clp->cl_list, cn);
 			zfs_close(cn->cn_handle);
 			free(cn);
@ -363,7 +432,8 @@ change_one(zfs_handle_t *zhp, void *data)
 	char property[ZFS_MAXPROPLEN];
 	char where[64];
 	prop_changenode_t *cn;
-	zfs_source_t sourcetype;
+	zprop_source_t sourcetype;
+	zprop_source_t share_sourcetype;

 	/*
 	 * We only want to unmount/unshare those filesystems that may inherit
@ -383,8 +453,25 @@ change_one(zfs_handle_t *zhp, void *data)
 		return (0);
 	}

+	/*
+	 * If we are "watching" sharenfs or sharesmb
+	 * then check out the companion property which is tracked
+	 * in cl_shareprop
+	 */
+	if (clp->cl_shareprop != ZPROP_INVAL &&
+	    zfs_prop_get(zhp, clp->cl_shareprop, property,
+	    sizeof (property), &share_sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
 	if (clp->cl_alldependents || clp->cl_allchildren ||
-	    sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) {
+	    sourcetype == ZPROP_SRC_DEFAULT ||
+	    sourcetype == ZPROP_SRC_INHERITED ||
+	    (clp->cl_shareprop != ZPROP_INVAL &&
+	    (share_sourcetype == ZPROP_SRC_DEFAULT ||
+	    share_sourcetype == ZPROP_SRC_INHERITED))) {
 		if ((cn = zfs_alloc(zfs_get_handle(zhp),
 		    sizeof (prop_changenode_t))) == NULL) {
 			zfs_close(zhp);
@ -392,9 +479,11 @@ change_one(zfs_handle_t *zhp, void *data)
 		}

 		cn->cn_handle = zhp;
-		cn->cn_mounted = zfs_is_mounted(zhp, NULL);
+		cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+		    zfs_is_mounted(zhp, NULL);
 		cn->cn_shared = zfs_is_shared(zhp);
 		cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+		cn->cn_needpost = B_TRUE;

 		/* Indicate if any child is exported to a local zone. */
 		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
@ -467,7 +556,8 @@ compare_mountpoints(const void *a, const void *b, void *unused)
 * mark whether it was shared beforehand.
 */
 prop_changelist_t *
-changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
+changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
+    int mnt_flags)
 {
 	prop_changelist_t *clp;
 	prop_changenode_t *cn;
@ -484,7 +574,8 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 * order, regardless of their position in the hierarchy.
 	 */
 	if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
-	    prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) {
+	    prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS ||
+	    prop == ZFS_PROP_SHARESMB) {
 		compare = compare_mountpoints;
 		clp->cl_sorted = B_TRUE;
 	}
@ -502,7 +593,8 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)

 	clp->cl_list = uu_list_create(clp->cl_pool, NULL,
 	    clp->cl_sorted ? UU_LIST_SORTED : 0);
-	clp->cl_flags = flags;
+	clp->cl_gflags = gather_flags;
+	clp->cl_mflags = mnt_flags;

 	if (clp->cl_list == NULL) {
 		assert(uu_error() == UU_ERROR_NO_MEMORY);
@ -529,6 +621,8 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
 	} else if (prop == ZFS_PROP_VOLSIZE) {
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VERSION) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
 	} else {
 		clp->cl_prop = prop;
 	}
@ -536,9 +630,19 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)

 	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
 	    clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB &&
 	    clp->cl_prop != ZFS_PROP_SHAREISCSI)
 		return (clp);

+	/*
+	 * If watching SHARENFS or SHARESMB then
+	 * also watch its companion property.
+	 */
+	if (clp->cl_prop == ZFS_PROP_SHARENFS)
+		clp->cl_shareprop = ZFS_PROP_SHARESMB;
+	else if (clp->cl_prop == ZFS_PROP_SHARESMB)
+		clp->cl_shareprop = ZFS_PROP_SHARENFS;
+
 	if (clp->cl_alldependents) {
 		if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) {
 			changelist_free(clp);
@ -554,7 +658,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 * and can't tell the difference.
 	 */
 	if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
-	    ZFS_TYPE_ANY)) == NULL) {
+	    ZFS_TYPE_DATASET)) == NULL) {
 		changelist_free(clp);
 		return (NULL);
 	}
@ -571,9 +675,11 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	}

 	cn->cn_handle = temp;
-	cn->cn_mounted = zfs_is_mounted(temp, NULL);
+	cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+	    zfs_is_mounted(temp, NULL);
 	cn->cn_shared = zfs_is_shared(temp);
 	cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	cn->cn_needpost = B_TRUE;

 	uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
 	if (clp->cl_sorted) {
@ -586,14 +692,22 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	}

 	/*
-	 * If the property was previously 'legacy' or 'none', record this fact,
-	 * as the behavior of changelist_postfix() will be different.
+	 * If the mountpoint property was previously 'legacy', or 'none',
+	 * record it as the behavior of changelist_postfix() will be different.
 	 */
-	if (zfs_prop_get(zhp, prop, property, sizeof (property),
+	if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) &&
+	    (zfs_prop_get(zhp, prop, property, sizeof (property),
 	    NULL, NULL, 0, B_FALSE) == 0 &&
-	    (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 ||
-	    strcmp(property, "off") == 0))
-		clp->cl_waslegacy = B_TRUE;
+	    (strcmp(property, "legacy") == 0 ||
+	    strcmp(property, "none") == 0))) {
+		/*
+		 * do not automatically mount ex-legacy datasets if
+		 * we specifically set canmount to noauto
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) !=
+		    ZFS_CANMOUNT_NOAUTO)
+			clp->cl_waslegacy = B_TRUE;
+	}

 	return (clp);
 }
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -126,6 +126,8 @@ typedef struct zfs_graph {
 	zfs_vertex_t		**zg_hash;
 	size_t			zg_size;
 	size_t			zg_nvertex;
+	const char		*zg_root;
+	int			zg_clone_count;
 } zfs_graph_t;

 /*
@ -255,7 +257,7 @@ zfs_vertex_sort_edges(zfs_vertex_t *zvp)
 * datasets in the pool.
 */
 static zfs_graph_t *
-zfs_graph_create(libzfs_handle_t *hdl, size_t size)
+zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
 {
 	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));

@ -269,6 +271,9 @@ zfs_graph_create(libzfs_handle_t *hdl, size_t size)
 		return (NULL);
 	}

+	zgp->zg_root = dataset;
+	zgp->zg_clone_count = 0;
+
 	return (zgp);
 }

@ -367,17 +372,16 @@ zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
 }

 /*
- * Iterate over all children of the given dataset, adding any vertices as
- * necessary.  Returns 0 if no cloned snapshots were seen, -1 if there was an
- * error, or 1 otherwise.  This is a simple recursive algorithm - the ZFS
- * namespace typically is very flat.  We manually invoke the necessary ioctl()
- * calls to avoid the overhead and additional semantics of zfs_open().
+ * Iterate over all children of the given dataset, adding any vertices
+ * as necessary.  Returns -1 if there was an error, or 0 otherwise.
+ * This is a simple recursive algorithm - the ZFS namespace typically
+ * is very flat.  We manually invoke the necessary ioctl() calls to
+ * avoid the overhead and additional semantics of zfs_open().
 */
 static int
 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 {
 	zfs_cmd_t zc = { 0 };
-	int ret = 0, err;
 	zfs_vertex_t *zvp;

 	/*
@ -390,18 +394,8 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 		return (0);

 	/*
-	 * We check the clone parent here instead of within the loop, so that if
-	 * the root dataset has been promoted from a clone, we find its parent
-	 * appropriately.
+	 * Iterate over all children
 	 */
-	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
-	    zc.zc_objset_stats.dds_clone_of[0] != '\0') {
-		if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
-		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
-			return (-1);
-	}
-
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
@ -417,9 +411,23 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 		 * dataset and clone statistics.  If this fails, the dataset has
 		 * since been removed, and we're pretty much screwed anyway.
 		 */
+		zc.zc_objset_stats.dds_origin[0] = '\0';
 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 			continue;

+		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+			if (zfs_graph_add(hdl, zgp,
+			    zc.zc_objset_stats.dds_origin, zc.zc_name,
+			    zc.zc_objset_stats.dds_creation_txg) != 0)
+				return (-1);
+			/*
+			 * Count origins only if they are contained in the graph
+			 */
+			if (isa_child_of(zc.zc_objset_stats.dds_origin,
+			    zgp->zg_root))
+				zgp->zg_clone_count--;
+		}
+
 		/*
 		 * Add an edge between the parent and the child.
 		 */
@ -428,19 +436,10 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 			return (-1);

 		/*
-		 * Iterate over all children
+		 * Recursively visit child
 		 */
-		err = iterate_children(hdl, zgp, zc.zc_name);
-		if (err == -1)
+		if (iterate_children(hdl, zgp, zc.zc_name))
 			return (-1);
-		else if (err == 1)
-			ret = 1;
-
-		/*
-		 * Indicate if we found a dataset with a non-zero clone count.
-		 */
-		if (zc.zc_objset_stats.dds_num_clones != 0)
-			ret = 1;
 	}

 	/*
@ -467,67 +466,84 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 		    zc.zc_objset_stats.dds_creation_txg) != 0)
 			return (-1);

-		/*
-		 * Indicate if we found a dataset with a non-zero clone count.
-		 */
-		if (zc.zc_objset_stats.dds_num_clones != 0)
-			ret = 1;
+		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
 	}

 	zvp->zv_visited = VISIT_SEEN;

-	return (ret);
+	return (0);
 }

 /*
- * Construct a complete graph of all necessary vertices.  First, we iterate over
- * only our object's children.  If we don't find any cloned snapshots, then we
- * simple return that.  Otherwise, we have to start at the pool root and iterate
- * over all datasets.
+ * Returns false if there are no snapshots with dependent clones in this
+ * subtree or if all of those clones are also in this subtree.  Returns
+ * true if there is an error or there are external dependents.
+ */
+static boolean_t
+external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	/*
+	 * Check whether this dataset is a clone or has clones since
+	 * iterate_children() only checks the children.
+	 */
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+		return (B_TRUE);
+
+	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+		if (zfs_graph_add(hdl, zgp,
+		    zc.zc_objset_stats.dds_origin, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (B_TRUE);
+		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
+			zgp->zg_clone_count--;
+	}
+
+	if ((zc.zc_objset_stats.dds_num_clones) ||
+	    iterate_children(hdl, zgp, dataset))
+		return (B_TRUE);
+
+	return (zgp->zg_clone_count != 0);
+}
+
+/*
+ * Construct a complete graph of all necessary vertices.  First, iterate over
+ * only our object's children.  If no cloned snapshots are found, or all of
+ * the cloned snapshots are in this subtree then return a graph of the subtree.
+ * Otherwise, start at the root of the pool and iterate over all datasets.
 */
 static zfs_graph_t *
 construct_graph(libzfs_handle_t *hdl, const char *dataset)
 {
-	zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
-	zfs_cmd_t zc = { 0 };
+	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
 	int ret = 0;

 	if (zgp == NULL)
 		return (zgp);

-	/*
-	 * We need to explicitly check whether this dataset has clones or not,
-	 * since iterate_children() only checks the children.
-	 */
-	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
-
-	if (zc.zc_objset_stats.dds_num_clones != 0 ||
-	    (ret = iterate_children(hdl, zgp, dataset)) != 0) {
+	if ((strchr(dataset, '/') == NULL) ||
+	    (external_dependents(hdl, zgp, dataset))) {
 		/*
 		 * Determine pool name and try again.
 		 */
-		char *pool, *slash;
+		int len = strcspn(dataset, "/@") + 1;
+		char *pool = zfs_alloc(hdl, len);

-		if ((slash = strchr(dataset, '/')) != NULL ||
-		    (slash = strchr(dataset, '@')) != NULL) {
-			pool = zfs_alloc(hdl, slash - dataset + 1);
-			if (pool == NULL) {
-				zfs_graph_destroy(zgp);
-				return (NULL);
-			}
-			(void) strncpy(pool, dataset, slash - dataset);
-			pool[slash - dataset] = '\0';
-
-			if (iterate_children(hdl, zgp, pool) == -1 ||
-			    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
-				free(pool);
-				zfs_graph_destroy(zgp);
-				return (NULL);
-			}
-
-			free(pool);
+		if (pool == NULL) {
+			zfs_graph_destroy(zgp);
+			return (NULL);
 		}
+		(void) strlcpy(pool, dataset, len);
+
+		if (iterate_children(hdl, zgp, pool) == -1 ||
+		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+			free(pool);
+			zfs_graph_destroy(zgp);
+			return (NULL);
+		}
+		free(pool);
 	}

 	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
@ -1,5 +1,5 @@
 /*
- * CDDL HEADER START
+ * CDDL HEADER SART
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
@ -20,21 +20,21 @@
 */

 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef	_LIBFS_IMPL_H
 #define	_LIBFS_IMPL_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/dmu.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_acl.h>
+#include <sys/spa.h>
 #include <sys/nvpair.h>

+#include <libshare.h>
 #include <libuutil.h>
 #include <libzfs.h>

@ -42,22 +42,33 @@
 extern "C" {
 #endif

+#ifdef	VERIFY
+#undef	VERIFY
+#endif
+#define	VERIFY	verify
+
 struct libzfs_handle {
 	int libzfs_error;
 	int libzfs_fd;
 	FILE *libzfs_mnttab;
 	FILE *libzfs_sharetab;
+	zpool_handle_t *libzfs_pool_handles;
 	uu_avl_pool_t *libzfs_ns_avlpool;
 	uu_avl_t *libzfs_ns_avl;
 	uint64_t libzfs_ns_gen;
 	int libzfs_desc_active;
 	char libzfs_action[1024];
 	char libzfs_desc[1024];
+	char *libzfs_log_str;
 	int libzfs_printerr;
+	void *libzfs_sharehdl; /* libshare handle */
+	uint_t libzfs_shareflags;
 };
+#define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */

 struct zfs_handle {
 	libzfs_handle_t *zfs_hdl;
+	zpool_handle_t *zpool_hdl;
 	char zfs_name[ZFS_MAXNAMELEN];
 	zfs_type_t zfs_type; /* type including snapshot */
 	zfs_type_t zfs_head_type; /* type excluding snapshot */
@ -66,7 +77,6 @@ struct zfs_handle {
 	nvlist_t *zfs_user_props;
 	boolean_t zfs_mntcheck;
 	char *zfs_mntopts;
-	char zfs_root[MAXPATHLEN];
 };

 /*
@ -77,14 +87,33 @@ struct zfs_handle {

 struct zpool_handle {
 	libzfs_handle_t *zpool_hdl;
+	zpool_handle_t *zpool_next;
 	char zpool_name[ZPOOL_MAXNAMELEN];
 	int zpool_state;
 	size_t zpool_config_size;
 	nvlist_t *zpool_config;
 	nvlist_t *zpool_old_config;
 	nvlist_t *zpool_props;
+	diskaddr_t zpool_start_block;
 };

+typedef  enum {
+	PROTO_NFS = 0,
+	PROTO_SMB = 1,
+	PROTO_END = 2
+} zfs_share_proto_t;
+
+/*
+ * The following can be used as a bitmask and any new values
+ * added must preserve that capability.
+ */
+typedef enum {
+	SHARED_NOT_SHARED = 0x0,
+	SHARED_ISCSI = 0x1,
+	SHARED_NFS = 0x2,
+	SHARED_SMB = 0x4
+} zfs_share_type_t;
+
 int zfs_error(libzfs_handle_t *, int, const char *);
 int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
 void zfs_error_aux(libzfs_handle_t *, const char *, ...);
@ -101,20 +130,24 @@ int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
 int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
    size_t *);

-int zfs_expand_proplist_common(libzfs_handle_t *, zfs_proplist_t **,
-    zfs_type_t);
-int zfs_get_proplist_common(libzfs_handle_t *, char *, zfs_proplist_t **,
-    zfs_type_t);
-zfs_prop_t zfs_prop_iter_common(zfs_prop_f, void *, zfs_type_t, boolean_t);
-zfs_prop_t zfs_name_to_prop_common(const char *, zfs_type_t);

-nvlist_t *zfs_validate_properties(libzfs_handle_t *, zfs_type_t, char *,
-	nvlist_t *, uint64_t, zfs_handle_t *zhp, const char *errbuf);
+int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
+    nvlist_t *, char **, uint64_t *, const char *);
+int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp,
+    zfs_type_t type);
+
+/*
+ * Use this changelist_gather() flag to force attempting mounts
+ * on each change node regardless of whether or not it is currently
+ * mounted.
+ */
+#define	CL_GATHER_MOUNT_ALWAYS	1

 typedef struct prop_changelist prop_changelist_t;

 int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
-int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *, size_t *);
+int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
+int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
 int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
 int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
 void zcmd_free_nvlists(zfs_cmd_t *);
@ -122,13 +155,15 @@ void zcmd_free_nvlists(zfs_cmd_t *);
 int changelist_prefix(prop_changelist_t *);
 int changelist_postfix(prop_changelist_t *);
 void changelist_rename(prop_changelist_t *, const char *, const char *);
-void changelist_remove(zfs_handle_t *, prop_changelist_t *);
+void changelist_remove(prop_changelist_t *, const char *);
 void changelist_free(prop_changelist_t *);
-prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int);
-int changelist_unshare(prop_changelist_t *);
+prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int);
+int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
 int changelist_haszonedchild(prop_changelist_t *);

 void remove_mountpoint(zfs_handle_t *);
+int create_parents(libzfs_handle_t *, char *, int);
+boolean_t isa_child_of(const char *dataset, const char *parent);

 zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);

@ -137,10 +172,23 @@ int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
 int zvol_create_link(libzfs_handle_t *, const char *);
 int zvol_remove_link(libzfs_handle_t *, const char *);
 int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
+boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);

 void namespace_clear(libzfs_handle_t *);

+/*
+ * libshare (sharemgr) interfaces used internally.
+ */
+
+extern int zfs_init_libshare(libzfs_handle_t *, int);
+extern void zfs_uninit_libshare(libzfs_handle_t *);
+extern int zfs_parse_options(char *, zfs_share_proto_t);
+
+extern int zfs_unshare_proto(zfs_handle_t *zhp,
+    const char *, zfs_share_proto_t *);
+
 #ifdef	__FreeBSD__
+
 /*
 * This is FreeBSD version of ioctl, because Solaris' ioctl() updates
 * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -213,11 +213,13 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
 	name_entry_t *ne;

 	/*
-	 * If this is a hot spare not currently in use, add it to the list of
-	 * names to translate, but don't do anything else.
+	 * If this is a hot spare not currently in use or level 2 cache
+	 * device, add it to the list of names to translate, but don't do
+	 * anything else.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-	    &state) == 0 && state == POOL_STATE_SPARE &&
+	    &state) == 0 &&
+	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
 		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
 			return (-1);
@ -361,6 +363,46 @@ pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
 	return (0);
 }

+static nvlist_t *
+refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
+{
+	nvlist_t *nvl;
+	zfs_cmd_t zc = { 0 };
+	int err;
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
+		return (NULL);
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc,
+	    zc.zc_nvlist_conf_size * 2) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+	    &zc)) != 0 && errno == ENOMEM) {
+		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+			zcmd_free_nvlists(&zc);
+			return (NULL);
+		}
+	}
+
+	if (err) {
+		(void) zpool_standard_error(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot discover pools"));
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	zcmd_free_nvlists(&zc);
+	return (nvl);
+}
+
 /*
 * Convert our list of pools into the definitive set of configurations.  We
 * start by picking the best config for each toplevel vdev.  Once that's done,
@ -369,26 +411,25 @@ pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
 * return to the user.
 */
 static nvlist_t *
-get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 {
 	pool_entry_t *pe;
 	vdev_entry_t *ve;
 	config_entry_t *ce;
 	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
-	nvlist_t **spares;
-	uint_t i, nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t i, nspares, nl2cache;
 	boolean_t config_seen;
 	uint64_t best_txg;
 	char *name, *hostname;
-	zfs_cmd_t zc = { 0 };
 	uint64_t version, guid;
-	size_t len;
-	int err;
 	uint_t children = 0;
 	nvlist_t **child = NULL;
 	uint_t c;
 	boolean_t isactive;
 	uint64_t hostid;
+	nvlist_t *nvl;
+	boolean_t found_one = B_FALSE;

 	if (nvlist_alloc(&ret, 0, 0) != 0)
 		goto nomem;
@ -570,6 +611,13 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 		}
 		nvlist_free(nvroot);

+		/*
+		 * zdb uses this path to report on active pools that were
+		 * imported or created using -R.
+		 */
+		if (active_ok)
+			goto add_pool;
+
 		/*
 		 * Determine if this pool is currently active, in which case we
 		 * can't actually import it.
@ -588,41 +636,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 			continue;
 		}

-		/*
-		 * Try to do the import in order to get vdev state.
-		 */
-		if (zcmd_write_src_nvlist(hdl, &zc, config, &len) != 0)
+		if ((nvl = refresh_config(hdl, config)) == NULL)
 			goto error;

 		nvlist_free(config);
-		config = NULL;
-
-		if (zcmd_alloc_dst_nvlist(hdl, &zc, len * 2) != 0) {
-			zcmd_free_nvlists(&zc);
-			goto error;
-		}
-
-		while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
-		    &zc)) != 0 && errno == ENOMEM) {
-			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
-				zcmd_free_nvlists(&zc);
-				goto error;
-			}
-		}
-
-		if (err) {
-			(void) zpool_standard_error(hdl, errno,
-			    dgettext(TEXT_DOMAIN, "cannot discover pools"));
-			zcmd_free_nvlists(&zc);
-			goto error;
-		}
-
-		if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
-			zcmd_free_nvlists(&zc);
-			goto error;
-		}
-
-		zcmd_free_nvlists(&zc);
+		config = nvl;

 		/*
 		 * Go through and update the paths for spares, now that we have
@ -638,6 +656,17 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 			}
 		}

+		/*
+		 * Update the paths for l2cache devices.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0) {
+			for (i = 0; i < nl2cache; i++) {
+				if (fix_paths(l2cache[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
 		/*
 		 * Restore the original information read from the actual label.
 		 */
@ -652,6 +681,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 			    hostname) == 0);
 		}

+add_pool:
 		/*
 		 * Add this pool to the list of configs.
 		 */
@ -660,10 +690,16 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 		if (nvlist_add_nvlist(ret, name, config) != 0)
 			goto nomem;

+		found_one = B_TRUE;
 		nvlist_free(config);
 		config = NULL;
 	}

+	if (!found_one) {
+		nvlist_free(ret);
+		ret = NULL;
+	}
+
 	return (ret);

 nomem:
@ -682,8 +718,9 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 * Return the offset of the given label.
 */
 static uint64_t
-label_offset(size_t size, int l)
+label_offset(uint64_t size, int l)
 {
+	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
 }
@ -698,19 +735,20 @@ zpool_read_label(int fd, nvlist_t **config)
 	struct stat64 statbuf;
 	int l;
 	vdev_label_t *label;
-	uint64_t state, txg;
+	uint64_t state, txg, size;

 	*config = NULL;

 	if (fstat64(fd, &statbuf) == -1)
 		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);

 	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
 		return (-1);

 	for (l = 0; l < VDEV_LABELS; l++) {
-		if (pread(fd, label, sizeof (vdev_label_t),
-		    label_offset(statbuf.st_size, l)) != sizeof (vdev_label_t))
+		if (pread64(fd, label, sizeof (vdev_label_t),
+		    label_offset(size, l)) != sizeof (vdev_label_t))
 			continue;

 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
@ -718,12 +756,12 @@ zpool_read_label(int fd, nvlist_t **config)
 			continue;

 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
-		    &state) != 0 || state > POOL_STATE_SPARE) {
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(*config);
 			continue;
 		}

-		if (state != POOL_STATE_SPARE &&
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) != 0 || txg == 0)) {
 			nvlist_free(*config);
@ -739,31 +777,20 @@ zpool_read_label(int fd, nvlist_t **config)
 	return (0);
 }

-/*
- * Given a list of directories to search, find all pools stored on disk.  This
- * includes partial pools which are not available to import.  If no args are
- * given (argc is 0), then the default directory (/dev) is searched.
- */
-nvlist_t *
-zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
+static int
+geom_find_import(libzfs_handle_t *hdl, pool_list_t *pools)
 {
-	int i;
 	char path[MAXPATHLEN];
-	nvlist_t *ret = NULL, *config;
-	int fd;
-	pool_list_t pools = { 0 };
-	pool_entry_t *pe, *penext;
-	vdev_entry_t *ve, *venext;
-	config_entry_t *ce, *cenext;
-	name_entry_t *ne, *nenext;
 	struct gmesh mesh;
 	struct gclass *mp;
 	struct ggeom *gp;
 	struct gprovider *pp;
+	nvlist_t *config;
+	int fd, ret = 0;

 	/*
 	 * Go through and read the label configuration information from every
-	 * possible device, organizing the information according to pool GUID
+	 * GEOM provider, organizing the information according to pool GUID
 	 * and toplevel GUID.
 	 */

@ -773,32 +800,183 @@ zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 	LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
        	LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
 			LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+				if ((fd = g_open(pp->lg_name, 0)) < 0)
+					continue;

 				(void) snprintf(path, sizeof (path), "%s%s",
 				    _PATH_DEV, pp->lg_name);

-				if ((fd = open64(path, O_RDONLY)) < 0)
-					continue;
-
 				if ((zpool_read_label(fd, &config)) != 0) {
+					(void) g_close(fd);
 					(void) no_memory(hdl);
 					goto error;
 				}

-				(void) close(fd);
+				(void) g_close(fd);

 				if (config == NULL)
 					continue;

+				if (add_config(hdl, pools, path, config) != 0) {
+					ret = -1;
+					goto error;
+				}
+			}
+		}
+	}
+error:
+	geom_deletetree(&mesh);
+	return (ret);
+}
+
+/*
+ * Given a list of directories to search, find all pools stored on disk.  This
+ * includes partial pools which are not available to import.  If no args are
+ * given (argc is 0), then the default directory (/dev/dsk) is searched.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+static nvlist_t *
+zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
+    boolean_t active_ok, char *poolname, uint64_t guid)
+{
+	int i;
+	DIR *dirp = NULL;
+	struct dirent64 *dp;
+	char path[MAXPATHLEN];
+	char *end;
+	size_t pathleft;
+	struct stat64 statbuf;
+	nvlist_t *ret = NULL, *config;
+	static char *default_dir = "/dev/dsk";
+	int fd;
+	pool_list_t pools = { 0 };
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+
+	verify(poolname == NULL || guid == 0);
+
+	if (argc == 0) {
+		argc = 1;
+		argv = &default_dir;
+	}
+
+	/*
+	 * Go through and read the label configuration information from every
+	 * possible device, organizing the information according to pool GUID
+	 * and toplevel GUID.
+	 */
+	for (i = 0; i < argc; i++) {
+		char *rdsk;
+		int dfd;
+
+		/* use realpath to normalize the path */
+		if (realpath(argv[i], path) == 0) {
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    argv[i]);
+			goto error;
+		}
+		end = &path[strlen(path)];
+		*end++ = '/';
+		*end = 0;
+		pathleft = &path[sizeof (path)] - end;
+
+		if (strcmp(argv[i], default_dir) == 0) {
+			geom_find_import(hdl, &pools);
+			continue;
+		}
+
+		/*
+		 * Using raw devices instead of block devices when we're
+		 * reading the labels skips a bunch of slow operations during
+		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
+		 */
+		if (strcmp(path, "/dev/dsk/") == 0)
+			rdsk = "/dev/rdsk/";
+		else
+			rdsk = path;
+
+		if ((dirp = opendir(rdsk)) == NULL) {
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    rdsk);
+			goto error;
+		}
+
+		/*
+		 * This is not MT-safe, but we have no MT consumers of libzfs
+		 */
+		while ((dp = readdir64(dirp)) != NULL) {
+			const char *name = dp->d_name;
+			if (name[0] == '.' &&
+			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+				continue;
+
+			(void) snprintf(path, sizeof (path), "%s/%s", rdsk,
+			    dp->d_name);
+
+			if ((fd = open64(path, O_RDONLY)) < 0)
+				continue;
+
+			/*
+			 * Ignore failed stats.  We only want regular
+			 * files, character devs and block devs.
+			 */
+			if (fstat64(fd, &statbuf) != 0 ||
+			    (!S_ISREG(statbuf.st_mode) &&
+			    !S_ISCHR(statbuf.st_mode) &&
+			    !S_ISBLK(statbuf.st_mode))) {
+				(void) close(fd);
+				continue;
+			}
+
+			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) close(fd);
+				(void) no_memory(hdl);
+				goto error;
+			}
+
+			(void) close(fd);
+
+			if (config != NULL) {
+				boolean_t matched = B_TRUE;
+
+				if (poolname != NULL) {
+					char *pname;
+
+					matched = nvlist_lookup_string(config,
+					    ZPOOL_CONFIG_POOL_NAME,
+					    &pname) == 0 &&
+					    strcmp(poolname, pname) == 0;
+				} else if (guid != 0) {
+					uint64_t this_guid;
+
+					matched = nvlist_lookup_uint64(config,
+					    ZPOOL_CONFIG_POOL_GUID,
+					    &this_guid) == 0 &&
+					    guid == this_guid;
+				}
+				if (!matched) {
+					nvlist_free(config);
+					config = NULL;
+					continue;
+				}
+				/* use the non-raw path for the config */
+				(void) strlcpy(end, name, pathleft);
 				if (add_config(hdl, &pools, path, config) != 0)
 					goto error;
 			}
 		}
+
+		(void) closedir(dirp);
+		dirp = NULL;
 	}

-	geom_deletetree(&mesh);
-
-	ret = get_configs(hdl, &pools);
+	ret = get_configs(hdl, &pools, active_ok);

 error:
 	for (pe = pools.pools; pe != NULL; pe = penext) {
@ -823,9 +1001,158 @@ zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 		free(ne);
 	}

+	if (dirp)
+		(void) closedir(dirp);
+
 	return (ret);
 }

+nvlist_t *
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
+}
+
+nvlist_t *
+zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
+    char *pool)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
+}
+
+nvlist_t *
+zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
+    uint64_t guid)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
+}
+
+nvlist_t *
+zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
+}
+
+/*
+ * Given a cache file, return the contents as a list of importable pools.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+nvlist_t *
+zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
+    char *poolname, uint64_t guid)
+{
+	char *buf;
+	int fd;
+	struct stat64 statbuf;
+	nvlist_t *raw, *src, *dst;
+	nvlist_t *pools;
+	nvpair_t *elem;
+	char *name;
+	uint64_t this_guid;
+	boolean_t active;
+
+	verify(poolname == NULL || guid == 0);
+
+	if ((fd = open(cachefile, O_RDONLY)) < 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
+		return (NULL);
+	}
+
+	if (fstat64(fd, &statbuf) != 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) close(fd);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
+		return (NULL);
+	}
+
+	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
+		(void) close(fd);
+		return (NULL);
+	}
+
+	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
+		(void) close(fd);
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "failed to read cache file contents"));
+		return (NULL);
+	}
+
+	(void) close(fd);
+
+	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "invalid or corrupt cache file contents"));
+		return (NULL);
+	}
+
+	free(buf);
+
+	/*
+	 * Go through and get the current state of the pools and refresh their
+	 * state.
+	 */
+	if (nvlist_alloc(&pools, 0, 0) != 0) {
+		(void) no_memory(hdl);
+		nvlist_free(raw);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
+		verify(nvpair_value_nvlist(elem, &src) == 0);
+
+		verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (poolname != NULL && strcmp(poolname, name) != 0)
+			continue;
+
+		verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+		    &this_guid) == 0);
+		if (guid != 0) {
+			verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+			    &this_guid) == 0);
+			if (guid != this_guid)
+				continue;
+		}
+
+		if (pool_active(hdl, name, this_guid, &active) != 0) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (active)
+			continue;
+
+		if ((dst = refresh_config(hdl, src)) == NULL) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
+			(void) no_memory(hdl);
+			nvlist_free(dst);
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+		nvlist_free(dst);
+	}
+
+	nvlist_free(raw);
+	return (pools);
+}
+
+
 boolean_t
 find_guid(nvlist_t *nv, uint64_t guid)
 {
@ -847,27 +1174,28 @@ find_guid(nvlist_t *nv, uint64_t guid)
 	return (B_FALSE);
 }

-typedef struct spare_cbdata {
+typedef struct aux_cbdata {
+	const char	*cb_type;
 	uint64_t	cb_guid;
 	zpool_handle_t	*cb_zhp;
-} spare_cbdata_t;
+} aux_cbdata_t;

 static int
-find_spare(zpool_handle_t *zhp, void *data)
+find_aux(zpool_handle_t *zhp, void *data)
 {
-	spare_cbdata_t *cbp = data;
-	nvlist_t **spares;
-	uint_t i, nspares;
+	aux_cbdata_t *cbp = data;
+	nvlist_t **list;
+	uint_t i, count;
 	uint64_t guid;
 	nvlist_t *nvroot;

 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);

-	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) == 0) {
-		for (i = 0; i < nspares; i++) {
-			verify(nvlist_lookup_uint64(spares[i],
+	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
+	    &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 			if (guid == cbp->cb_guid) {
 				cbp->cb_zhp = zhp;
@ -896,7 +1224,7 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
 	zpool_handle_t *zhp;
 	nvlist_t *pool_config;
 	uint64_t stateval, isspare;
-	spare_cbdata_t cb = { 0 };
+	aux_cbdata_t cb = { 0 };
 	boolean_t isactive;

 	*inuse = B_FALSE;
@ -914,7 +1242,7 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) == 0);

-	if (stateval != POOL_STATE_SPARE) {
+	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
@ -993,7 +1321,24 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
 		 */
 		cb.cb_zhp = NULL;
 		cb.cb_guid = vdev_guid;
-		if (zpool_iter(hdl, find_spare, &cb) == 1) {
+		cb.cb_type = ZPOOL_CONFIG_SPARES;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	case POOL_STATE_L2CACHE:
+
+		/*
+		 * Check if any pool is currently using this l2cache device.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
 			name = (char *)zpool_get_name(cb.cb_zhp);
 			ret = TRUE;
 		} else {
@ -1008,6 +1353,8 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,

 	if (ret) {
 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+			if (cb.cb_zhp)
+				zpool_close(cb.cb_zhp);
 			nvlist_free(config);
 			return (-1);
 		}
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
@ -19,18 +19,16 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
 * This file contains the functions which analyze the status of a pool.  This
 * include both the status of an active pool, as well as the status exported
 * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
 * the pool.  This status is independent (to a certain degree) from the state of
- * the pool.  A pool's state descsribes only whether or not it is capable of
+ * the pool.  A pool's state describes only whether or not it is capable of
 * providing the necessary fault tolerance for data.  The status describes the
 * overall status of devices.  A pool that is online can still have a device
 * that is experiencing errors.
@ -47,7 +45,7 @@
 #include "libzfs_impl.h"

 /*
- * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
+ * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
 * in libzfs.h.  Note that there are some status results which go past the end
 * of this table, and hence have no associated message ID.
 */
@ -62,26 +60,10 @@ static char *zfs_msgid_table[] = {
 	"ZFS-8000-8A",
 	"ZFS-8000-9P",
 	"ZFS-8000-A5",
-	"ZFS-8000-EY"
-};
-
-/*
- * If the pool is active, a certain class of static errors is overridden by the
- * faults as analayzed by FMA.  These faults have separate knowledge articles,
- * and the article referred to by 'zpool status' must match that indicated by
- * the syslog error message.  We override missing data as well as corrupt pool.
- */
-static char *zfs_msgid_table_active[] = {
-	"ZFS-8000-14",
-	"ZFS-8000-D3",		/* overridden */
-	"ZFS-8000-D3",		/* overridden */
-	"ZFS-8000-4J",
-	"ZFS-8000-5E",
-	"ZFS-8000-6X",
-	"ZFS-8000-CS",		/* overridden */
-	"ZFS-8000-8A",
-	"ZFS-8000-9P",
-	"ZFS-8000-CS",		/* overridden */
+	"ZFS-8000-EY",
+	"ZFS-8000-HC",
+	"ZFS-8000-JQ",
+	"ZFS-8000-K4",
 };

 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
@ -94,11 +76,18 @@ vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
 	    aux == VDEV_AUX_OPEN_FAILED);
 }

+/* ARGSUSED */
+static int
+vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_FAULTED);
+}
+
 /* ARGSUSED */
 static int
 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
 {
-	return (errs != 0);
+	return (state == VDEV_STATE_DEGRADED || errs != 0);
 }

 /* ARGSUSED */
@ -163,9 +152,9 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
 * following:
 *
 *	- Check for a complete and valid configuration
- *	- Look for any missing devices in a non-replicated config
+ *	- Look for any faulted or missing devices in a non-replicated config
 *	- Check for any data errors
- *	- Check for any missing devices in a replicated config
+ *	- Check for any faulted or missing devices in a replicated config
 *	- Look for any devices showing errors
 *	- Check for any resilvering devices
 *
@ -181,6 +170,7 @@ check_status(nvlist_t *config, boolean_t isimport)
 	uint64_t nerr;
 	uint64_t version;
 	uint64_t stateval;
+	uint64_t suspended;
 	uint64_t hostid = 0;

 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
@ -215,8 +205,30 @@ check_status(nvlist_t *config, boolean_t isimport)
 		return (ZPOOL_STATUS_BAD_GUID_SUM);

 	/*
-	 * Missing devices in non-replicated config.
+	 * Check whether the pool has suspended due to failed I/O.
 	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
+	    &suspended) == 0) {
+		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
+			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
+		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+	}
+
+	/*
+	 * Could not read a log.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
+		return (ZPOOL_STATUS_BAD_LOG);
+	}
+
+	/*
+	 * Bad devices in non-replicated config.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_NR);
+
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
 	    find_vdev_problem(nvroot, vdev_missing))
 		return (ZPOOL_STATUS_MISSING_DEV_NR);
@ -244,6 +256,8 @@ check_status(nvlist_t *config, boolean_t isimport)
 	/*
 	 * Missing devices in a replicated config.
 	 */
+	if (find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_R);
 	if (find_vdev_problem(nvroot, vdev_missing))
 		return (ZPOOL_STATUS_MISSING_DEV_R);
 	if (find_vdev_problem(nvroot, vdev_broken))
@ -270,7 +284,7 @@ check_status(nvlist_t *config, boolean_t isimport)
 	/*
 	 * Outdated, but usable, version
 	 */
-	if (version < ZFS_VERSION)
+	if (version < SPA_VERSION)
 		return (ZPOOL_STATUS_VERSION_OLDER);

 	return (ZPOOL_STATUS_OK);
@ -284,7 +298,7 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid)
 	if (ret >= NMSGID)
 		*msgid = NULL;
 	else
-		*msgid = zfs_msgid_table_active[ret];
+		*msgid = zfs_msgid_table[ret];

 	return (ret);
 }
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
@ -19,12 +19,10 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
 * Internal utility routines for the ZFS library.
 */
@ -37,6 +35,8 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <ctype.h>
+#include <math.h>
 #include <sys/mnttab.h>
 #include <sys/mntent.h>
 #include <sys/types.h>
@ -44,6 +44,7 @@
 #include <libzfs.h>

 #include "libzfs_impl.h"
+#include "zfs_prop.h"

 int
 libzfs_errno(libzfs_handle_t *hdl)
@ -133,6 +134,14 @@ libzfs_error_description(libzfs_handle_t *hdl)
 		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
 	case EZFS_SHARENFSFAILED:
 		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+	case EZFS_UNSHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
+	case EZFS_SHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
+	case EZFS_ISCSISVCUNAVAIL:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgt service need to be enabled by "
+		    "a privileged user"));
 	case EZFS_DEVLINKS:
 		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
 	case EZFS_PERM:
@ -169,6 +178,38 @@ libzfs_error_description(libzfs_handle_t *hdl)
 		    "this pool operation"));
 	case EZFS_NAMETOOLONG:
 		return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
+	case EZFS_OPENFAILED:
+		return (dgettext(TEXT_DOMAIN, "open failed"));
+	case EZFS_NOCAP:
+		return (dgettext(TEXT_DOMAIN,
+		    "disk capacity information could not be retrieved"));
+	case EZFS_LABELFAILED:
+		return (dgettext(TEXT_DOMAIN, "write of label failed"));
+	case EZFS_BADWHO:
+		return (dgettext(TEXT_DOMAIN, "invalid user/group"));
+	case EZFS_BADPERM:
+		return (dgettext(TEXT_DOMAIN, "invalid permission"));
+	case EZFS_BADPERMSET:
+		return (dgettext(TEXT_DOMAIN, "invalid permission set name"));
+	case EZFS_NODELEGATION:
+		return (dgettext(TEXT_DOMAIN, "delegated administration is "
+		    "disabled on pool"));
+	case EZFS_PERMRDONLY:
+		return (dgettext(TEXT_DOMAIN, "snapshot permissions cannot be"
+		    " modified"));
+	case EZFS_BADCACHE:
+		return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
+	case EZFS_ISL2CACHE:
+		return (dgettext(TEXT_DOMAIN, "device is in use as a cache"));
+	case EZFS_VDEVNOTSUP:
+		return (dgettext(TEXT_DOMAIN, "vdev specification is not "
+		    "supported"));
+	case EZFS_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this dataset"));
+	case EZFS_ACTIVE_SPARE:
+		return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
+		    "device"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
@ -249,6 +290,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
 		zfs_verror(hdl, EZFS_PERM, fmt, ap);
 		return (-1);

+	case ECANCELED:
+		zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap);
+		return (-1);
+
 	case EIO:
 		zfs_verror(hdl, EZFS_IO, fmt, ap);
 		return (-1);
@ -280,9 +325,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 		return (-1);
 	}

-
 	switch (error) {
 	case ENXIO:
+	case ENODEV:
 		zfs_verror(hdl, EZFS_IO, fmt, ap);
 		break;

@ -308,11 +353,17 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 		    "dataset is busy"));
 		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
 		break;
-
+	case EROFS:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot permissions cannot be modified"));
+		zfs_verror(hdl, EZFS_PERMRDONLY, fmt, ap);
+		break;
 	case ENAMETOOLONG:
 		zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
 		break;
-
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
+		break;
 	default:
 		zfs_error_aux(hdl, strerror(errno));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@ -361,7 +412,7 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)

 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
-		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
 		break;

 	case ENXIO:
@ -382,6 +433,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 		zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
 		break;

+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
 	default:
 		zfs_error_aux(hdl, strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@ -483,9 +539,8 @@ zfs_nicenum(uint64_t num, char *buf, size_t buflen)
 		 */
 		int i;
 		for (i = 2; i >= 0; i--) {
-			(void) snprintf(buf, buflen, "%.*f%c", i,
-			    (double)num / (1ULL << 10 * index), u);
-			if (strlen(buf) <= 5)
+			if (snprintf(buf, buflen, "%.*f%c", i,
+			    (double)num / (1ULL << 10 * index), u) <= 5)
 				break;
 		}
 	}
@ -538,6 +593,9 @@ libzfs_init(void)

 	hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r");

+	zfs_prop_init();
+	zpool_prop_init();
+
 	return (hdl);
 }

@ -549,6 +607,10 @@ libzfs_fini(libzfs_handle_t *hdl)
 		(void) fclose(hdl->libzfs_mnttab);
 	if (hdl->libzfs_sharetab)
 		(void) fclose(hdl->libzfs_sharetab);
+	zfs_uninit_libshare(hdl);
+	if (hdl->libzfs_log_str)
+		(void) free(hdl->libzfs_log_str);
+	zpool_free_handles(hdl);
 	namespace_clear(hdl);
 	free(hdl);
 }
@ -565,6 +627,12 @@ zfs_get_handle(zfs_handle_t *zhp)
 	return (zhp->zfs_hdl);
 }

+zpool_handle_t *
+zfs_get_pool_handle(const zfs_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
 /*
 * Given a name, determine whether or not it's a valid path
 * (starts with '/' or "./").  If so, walk the mnttab trying
@ -637,13 +705,14 @@ zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
 void
 zcmd_free_nvlists(zfs_cmd_t *zc)
 {
+	free((void *)(uintptr_t)zc->zc_nvlist_conf);
 	free((void *)(uintptr_t)zc->zc_nvlist_src);
 	free((void *)(uintptr_t)zc->zc_nvlist_dst);
 }

-int
-zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl,
-    size_t *size)
+static int
+zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen,
+    nvlist_t *nvl)
 {
 	char *packed;
 	size_t len;
@ -655,14 +724,26 @@ zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl,

 	verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);

-	zc->zc_nvlist_src = (uint64_t)(uintptr_t)packed;
-	zc->zc_nvlist_src_size = len;
+	*outnv = (uint64_t)(uintptr_t)packed;
+	*outlen = len;

-	if (size)
-		*size = len;
 	return (0);
 }

+int
+zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf,
+	    &zc->zc_nvlist_conf_size, nvl));
+}
+
+int
+zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src,
+	    &zc->zc_nvlist_src_size, nvl));
+}
+
 /*
 * Unpacks an nvlist from the ZFS ioctl command structure.
 */
@ -676,10 +757,32 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
 	return (0);
 }

-static void
-zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
 {
-	zfs_proplist_t *pl = cbp->cb_proplist;
+	int error;
+
+	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
+	error = ioctl(hdl->libzfs_fd, request, zc);
+	if (hdl->libzfs_log_str) {
+		free(hdl->libzfs_log_str);
+		hdl->libzfs_log_str = NULL;
+	}
+	zc->zc_history = 0;
+
+	return (error);
+}
+
+/*
+ * ================================================================
+ * API shared by zfs and zpool property management
+ * ================================================================
+ */
+
+static void
+zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
+{
+	zprop_list_t *pl = cbp->cb_proplist;
 	int i;
 	char *title;
 	size_t len;
@ -711,8 +814,12 @@ zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
 		/*
 		 * 'PROPERTY' column
 		 */
-		if (pl->pl_prop != ZFS_PROP_INVAL) {
-			len = strlen(zfs_prop_to_name(pl->pl_prop));
+		if (pl->pl_prop != ZPROP_INVAL) {
+			const char *propname = (type == ZFS_TYPE_POOL) ?
+			    zpool_prop_to_name(pl->pl_prop) :
+			    zfs_prop_to_name(pl->pl_prop);
+
+			len = strlen(propname);
 			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
 				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
 		} else {
@ -731,7 +838,8 @@ zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
 		/*
 		 * 'NAME' and 'SOURCE' columns
 		 */
-		if (pl->pl_prop == ZFS_PROP_NAME &&
+		if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME) &&
 		    pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
 			cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
 			cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
@ -777,8 +885,8 @@ zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
 * structure.
 */
 void
-libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,
-    const char *propname, const char *value, zfs_source_t sourcetype,
+zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
+    const char *propname, const char *value, zprop_source_t sourcetype,
    const char *source)
 {
 	int i;
@ -792,7 +900,7 @@ libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,
 		return;

 	if (cbp->cb_first)
-		zfs_print_prop_headers(cbp);
+		zprop_print_headers(cbp, cbp->cb_type);

 	for (i = 0; i < 4; i++) {
 		switch (cbp->cb_columns[i]) {
@ -810,23 +918,23 @@ libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,

 		case GET_COL_SOURCE:
 			switch (sourcetype) {
-			case ZFS_SRC_NONE:
+			case ZPROP_SRC_NONE:
 				str = "-";
 				break;

-			case ZFS_SRC_DEFAULT:
+			case ZPROP_SRC_DEFAULT:
 				str = "default";
 				break;

-			case ZFS_SRC_LOCAL:
+			case ZPROP_SRC_LOCAL:
 				str = "local";
 				break;

-			case ZFS_SRC_TEMPORARY:
+			case ZPROP_SRC_TEMPORARY:
 				str = "temporary";
 				break;

-			case ZFS_SRC_INHERITED:
+			case ZPROP_SRC_INHERITED:
 				(void) snprintf(buf, sizeof (buf),
 				    "inherited from %s", source);
 				str = buf;
@ -851,3 +959,451 @@ libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,

 	(void) printf("\n");
 }
+
+/*
+ * Given a numeric suffix, convert the value into a number of bits that the
+ * resulting value must be shifted.
+ */
+static int
+str2shift(libzfs_handle_t *hdl, const char *buf)
+{
+	const char *ends = "BKMGTPEZ";
+	int i;
+
+	if (buf[0] == '\0')
+		return (0);
+	for (i = 0; i < strlen(ends); i++) {
+		if (toupper(buf[0]) == ends[i])
+			break;
+	}
+	if (i == strlen(ends)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid numeric suffix '%s'"), buf);
+		return (-1);
+	}
+
+	/*
+	 * We want to allow trailing 'b' characters for 'GB' or 'Mb'.  But don't
+	 * allow 'BB' - that's just weird.
+	 */
+	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
+	    toupper(buf[0]) != 'B'))
+		return (10*i);
+
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "invalid numeric suffix '%s'"), buf);
+	return (-1);
+}
+
+/*
+ * Convert a string of the form '100G' into a real number.  Used when setting
+ * properties or creating a volume.  'buf' is used to place an extended error
+ * message for the caller to use.
+ */
+int
+zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
+{
+	char *end;
+	int shift;
+
+	*num = 0;
+
+	/* Check to see if this looks like a number.  */
+	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "bad numeric value '%s'"), value);
+		return (-1);
+	}
+
+	/* Rely on stroll() to process the numeric portion.  */
+	errno = 0;
+	*num = strtoll(value, &end, 10);
+
+	/*
+	 * Check for ERANGE, which indicates that the value is too large to fit
+	 * in a 64-bit value.
+	 */
+	if (errno == ERANGE) {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "numeric value is too large"));
+		return (-1);
+	}
+
+	/*
+	 * If we have a decimal value, then do the computation with floating
+	 * point arithmetic.  Otherwise, use standard arithmetic.
+	 */
+	if (*end == '.') {
+		double fval = strtod(value, &end);
+
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		fval *= pow(2, shift);
+
+		if (fval > UINT64_MAX) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num = (uint64_t)fval;
+	} else {
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		/* Check for overflow */
+		if (shift >= 64 || (*num << shift) >> shift != *num) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num <<= shift;
+	}
+
+	return (0);
+}
+
+/*
+ * Given a propname=value nvpair to set, parse any numeric properties
+ * (index, boolean, etc) if they are specified as strings and add the
+ * resulting nvpair to the returned nvlist.
+ *
+ * At the DSL layer, all properties are either 64-bit numbers or strings.
+ * We want the user to be able to ignore this fact and specify properties
+ * as native values (numbers, for example) or as strings (to simplify
+ * command line utilities).  This also handles converting index types
+ * (compression, checksum, etc) from strings to their on-disk index.
+ */
+int
+zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
+    zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp,
+    const char *errbuf)
+{
+	data_type_t datatype = nvpair_type(elem);
+	zprop_type_t proptype;
+	const char *propname;
+	char *value;
+	boolean_t isnone = B_FALSE;
+
+	if (type == ZFS_TYPE_POOL) {
+		proptype = zpool_prop_get_type(prop);
+		propname = zpool_prop_to_name(prop);
+	} else {
+		proptype = zfs_prop_get_type(prop);
+		propname = zfs_prop_to_name(prop);
+	}
+
+	/*
+	 * Convert any properties to the internal DSL value types.
+	 */
+	*svalp = NULL;
+	*ivalp = 0;
+
+	switch (proptype) {
+	case PROP_TYPE_STRING:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+		(void) nvpair_value_string(elem, svalp);
+		if (strlen(*svalp) >= ZFS_MAXPROPLEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' is too long"), nvpair_name(elem));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_NUMBER:
+		if (datatype == DATA_TYPE_STRING) {
+			(void) nvpair_value_string(elem, &value);
+			if (strcmp(value, "none") == 0) {
+				isnone = B_TRUE;
+			} else if (zfs_nicestrtonum(hdl, value, ivalp)
+			    != 0) {
+				goto error;
+			}
+		} else if (datatype == DATA_TYPE_UINT64) {
+			(void) nvpair_value_uint64(elem, ivalp);
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a number"), nvpair_name(elem));
+			goto error;
+		}
+
+		/*
+		 * Quota special: force 'none' and don't allow 0.
+		 */
+		if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone &&
+		    (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "use 'none' to disable quota/refquota"));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_INDEX:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+
+		(void) nvpair_value_string(elem, &value);
+
+		if (zprop_string_to_index(prop, value, ivalp, type) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be one of '%s'"), propname,
+			    zprop_values(prop, type));
+			goto error;
+		}
+		break;
+
+	default:
+		abort();
+	}
+
+	/*
+	 * Add the result to our return set of properties.
+	 */
+	if (*svalp != NULL) {
+		if (nvlist_add_string(ret, propname, *svalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	} else {
+		if (nvlist_add_uint64(ret, propname, *ivalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	}
+
+	return (0);
+error:
+	(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+	return (-1);
+}
+
+static int
+addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	int prop;
+	zprop_list_t *entry;
+
+	prop = zprop_name_to_prop(propname, type);
+
+	if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type))
+		prop = ZPROP_INVAL;
+
+	/*
+	 * When no property table entry can be found, return failure if
+	 * this is a pool property or if this isn't a user-defined
+	 * dataset property,
+	 */
+	if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
+	    !zfs_prop_user(propname))) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid property '%s'"), propname);
+		return (zfs_error(hdl, EZFS_BADPROP,
+		    dgettext(TEXT_DOMAIN, "bad property list")));
+	}
+
+	if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+		return (-1);
+
+	entry->pl_prop = prop;
+	if (prop == ZPROP_INVAL) {
+		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) {
+			free(entry);
+			return (-1);
+		}
+		entry->pl_width = strlen(propname);
+	} else {
+		entry->pl_width = zprop_width(prop, &entry->pl_fixed,
+		    type);
+	}
+
+	*listp = entry;
+
+	return (0);
+}
+
+/*
+ * Given a comma-separated list of properties, construct a property list
+ * containing both user-defined and native properties.  This function will
+ * return a NULL list if 'all' is specified, which can later be expanded
+ * by zprop_expand_list().
+ */
+int
+zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	*listp = NULL;
+
+	/*
+	 * If 'all' is specified, return a NULL list.
+	 */
+	if (strcmp(props, "all") == 0)
+		return (0);
+
+	/*
+	 * If no props were specified, return an error.
+	 */
+	if (props[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no properties specified"));
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad property list")));
+	}
+
+	/*
+	 * It would be nice to use getsubopt() here, but the inclusion of column
+	 * aliases makes this more effort than it's worth.
+	 */
+	while (*props != '\0') {
+		size_t len;
+		char *p;
+		char c;
+
+		if ((p = strchr(props, ',')) == NULL) {
+			len = strlen(props);
+			p = props + len;
+		} else {
+			len = p - props;
+		}
+
+		/*
+		 * Check for empty options.
+		 */
+		if (len == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "empty property name"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad property list")));
+		}
+
+		/*
+		 * Check all regular property names.
+		 */
+		c = props[len];
+		props[len] = '\0';
+
+		if (strcmp(props, "space") == 0) {
+			static char *spaceprops[] = {
+				"name", "avail", "used", "usedbysnapshots",
+				"usedbydataset", "usedbyrefreservation",
+				"usedbychildren", NULL
+			};
+			int i;
+
+			for (i = 0; spaceprops[i]; i++) {
+				if (addlist(hdl, spaceprops[i], listp, type))
+					return (-1);
+				listp = &(*listp)->pl_next;
+			}
+		} else {
+			if (addlist(hdl, props, listp, type))
+				return (-1);
+			listp = &(*listp)->pl_next;
+		}
+
+		props = p;
+		if (c == ',')
+			props++;
+	}
+
+	return (0);
+}
+
+void
+zprop_free_list(zprop_list_t *pl)
+{
+	zprop_list_t *next;
+
+	while (pl != NULL) {
+		next = pl->pl_next;
+		free(pl->pl_user_prop);
+		free(pl);
+		pl = next;
+	}
+}
+
+typedef struct expand_data {
+	zprop_list_t	**last;
+	libzfs_handle_t	*hdl;
+	zfs_type_t type;
+} expand_data_t;
+
+int
+zprop_expand_list_cb(int prop, void *cb)
+{
+	zprop_list_t *entry;
+	expand_data_t *edp = cb;
+
+	if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL)
+		return (ZPROP_INVAL);
+
+	entry->pl_prop = prop;
+	entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type);
+	entry->pl_all = B_TRUE;
+
+	*(edp->last) = entry;
+	edp->last = &entry->pl_next;
+
+	return (ZPROP_CONT);
+}
+
+int
+zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type)
+{
+	zprop_list_t *entry;
+	zprop_list_t **last;
+	expand_data_t exp;
+
+	if (*plp == NULL) {
+		/*
+		 * If this is the very first time we've been called for an 'all'
+		 * specification, expand the list to include all native
+		 * properties.
+		 */
+		last = plp;
+
+		exp.last = last;
+		exp.hdl = hdl;
+		exp.type = type;
+
+		if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE,
+		    B_FALSE, type) == ZPROP_INVAL)
+			return (-1);
+
+		/*
+		 * Add 'name' to the beginning of the list, which is handled
+		 * specially.
+		 */
+		if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+			return (-1);
+
+		entry->pl_prop = (type == ZFS_TYPE_POOL) ?  ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME;
+		entry->pl_width = zprop_width(entry->pl_prop,
+		    &entry->pl_fixed, type);
+		entry->pl_all = B_TRUE;
+		entry->pl_next = *plp;
+		*plp = entry;
+	}
+	return (0);
+}
+
+int
+zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered,
+    zfs_type_t type)
+{
+	return (zprop_iter_common(func, cb, show_all, ordered, type));
+}
--- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
@ -19,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -101,20 +101,24 @@ void
 zmutex_init(kmutex_t *mp)
 {
 	mp->m_owner = NULL;
+	mp->initialized = B_TRUE;
 	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
 }

 void
 zmutex_destroy(kmutex_t *mp)
 {
+	ASSERT(mp->initialized == B_TRUE);
 	ASSERT(mp->m_owner == NULL);
 	(void) _mutex_destroy(&(mp)->m_lock);
 	mp->m_owner = (void *)-1UL;
+	mp->initialized = B_FALSE;
 }

 void
 mutex_enter(kmutex_t *mp)
 {
+	ASSERT(mp->initialized == B_TRUE);
 	ASSERT(mp->m_owner != (void *)-1UL);
 	ASSERT(mp->m_owner != curthread);
 	VERIFY(mutex_lock(&mp->m_lock) == 0);
@ -125,6 +129,7 @@ mutex_enter(kmutex_t *mp)
 int
 mutex_tryenter(kmutex_t *mp)
 {
+	ASSERT(mp->initialized == B_TRUE);
 	ASSERT(mp->m_owner != (void *)-1UL);
 	if (mutex_trylock(&mp->m_lock) == 0) {
 		ASSERT(mp->m_owner == NULL);
@ -138,6 +143,7 @@ mutex_tryenter(kmutex_t *mp)
 void
 mutex_exit(kmutex_t *mp)
 {
+	ASSERT(mp->initialized == B_TRUE);
 	ASSERT(mp->m_owner == curthread);
 	mp->m_owner = NULL;
 	VERIFY(mutex_unlock(&mp->m_lock) == 0);
@ -146,6 +152,7 @@ mutex_exit(kmutex_t *mp)
 void *
 mutex_owner(kmutex_t *mp)
 {
+	ASSERT(mp->initialized == B_TRUE);
 	return (mp->m_owner);
 }

@ -160,7 +167,7 @@ rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
 {
 	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
 	rwlp->rw_owner = NULL;
-	rwlp->rw_count = 0;
+	rwlp->initialized = B_TRUE;
 }

 void
@ -168,22 +175,23 @@ rw_destroy(krwlock_t *rwlp)
 {
 	rwlock_destroy(&rwlp->rw_lock);
 	rwlp->rw_owner = (void *)-1UL;
-	rwlp->rw_count = -2;
+	rwlp->initialized = B_FALSE;
 }

 void
 rw_enter(krwlock_t *rwlp, krw_t rw)
 {
 	//ASSERT(!RW_LOCK_HELD(rwlp));
+	ASSERT(rwlp->initialized == B_TRUE);
 	ASSERT(rwlp->rw_owner != (void *)-1UL);
 	ASSERT(rwlp->rw_owner != curthread);

 	if (rw == RW_READER) {
-		(void) rw_rdlock(&rwlp->rw_lock);
+		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
 		ASSERT(rwlp->rw_count >= 0);
 		atomic_add_int(&rwlp->rw_count, 1);
 	} else {
-		(void) rw_wrlock(&rwlp->rw_lock);
+		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
 		ASSERT(rwlp->rw_count == 0);
 		rwlp->rw_count = -1;
 		rwlp->rw_owner = curthread;
@ -193,6 +201,7 @@ rw_enter(krwlock_t *rwlp, krw_t rw)
 void
 rw_exit(krwlock_t *rwlp)
 {
+	ASSERT(rwlp->initialized == B_TRUE);
 	ASSERT(rwlp->rw_owner != (void *)-1UL);

 	if (rwlp->rw_owner == curthread) {
@ -205,7 +214,7 @@ rw_exit(krwlock_t *rwlp)
 		ASSERT(rwlp->rw_count > 0);
 		atomic_add_int(&rwlp->rw_count, -1);
 	}
-	(void) rw_unlock(&rwlp->rw_lock);
+	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
 }

 int
@ -213,6 +222,7 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw)
 {
 	int rv;

+	ASSERT(rwlp->initialized == B_TRUE);
 	ASSERT(rwlp->rw_owner != (void *)-1UL);
 	ASSERT(rwlp->rw_owner != curthread);

@ -241,6 +251,7 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw)
 int
 rw_tryupgrade(krwlock_t *rwlp)
 {
+	ASSERT(rwlp->initialized == B_TRUE);
 	ASSERT(rwlp->rw_owner != (void *)-1UL);

 	return (0);
@ -422,9 +433,10 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 	return (0);
 }

+/*ARGSUSED*/
 int
 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
-    int x3, vnode_t *startvp)
+    int x3, vnode_t *startvp, int fd)
 {
 	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
 	int ret;
@ -432,6 +444,7 @@ vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
 	ASSERT(startvp == rootdir);
 	(void) sprintf(realpath, "/%s", path);

+	/* fd ignored for now, need if want to simulate nbmand support */
 	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);

 	umem_free(realpath, strlen(path) + 2);
@ -469,7 +482,7 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
 }

 void
-vn_close(vnode_t *vp)
+vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
 {
 	close(vp->v_fd);
 	spa_strfree(vp->v_path);
@ -657,7 +670,8 @@ kobj_open_file(char *name)
 	vnode_t *vp;

 	/* set vp as the _fd field of the file */
-	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0)
+	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
+	    -1) != 0)
 		return ((void *)-1UL);

 	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
@ -679,7 +693,7 @@ kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 void
 kobj_close_file(struct _buf *file)
 {
-	vn_close((vnode_t *)file->_fd);
+	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
 	umem_free(file, sizeof (struct _buf));
 }

@ -690,7 +704,7 @@ kobj_get_filesize(struct _buf *file, uint64_t *size)
 	vnode_t *vp = (vnode_t *)file->_fd;

 	if (fstat64(vp->v_fd, &st) == -1) {
-		vn_close(vp);
+		vn_close(vp, 0, NULL, NULL);
 		return (errno);
 	}
 	*size = st.st_size;
@ -746,10 +760,11 @@ highbit(ulong_t i)
 }
 #endif

+static int random_fd = -1, urandom_fd = -1;
+
 static int
-random_get_bytes_common(uint8_t *ptr, size_t len, char *devname)
+random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
 {
-	int fd = open(devname, O_RDONLY);
 	size_t resid = len;
 	ssize_t bytes;

@ -757,26 +772,24 @@ random_get_bytes_common(uint8_t *ptr, size_t len, char *devname)

 	while (resid != 0) {
 		bytes = read(fd, ptr, resid);
-		ASSERT(bytes >= 0);
+		ASSERT3S(bytes, >=, 0);
 		ptr += bytes;
 		resid -= bytes;
 	}

-	close(fd);
-
 	return (0);
 }

 int
 random_get_bytes(uint8_t *ptr, size_t len)
 {
-	return (random_get_bytes_common(ptr, len, "/dev/random"));
+	return (random_get_bytes_common(ptr, len, random_fd));
 }

 int
 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 {
-	return (random_get_bytes_common(ptr, len, "/dev/urandom"));
+	return (random_get_bytes_common(ptr, len, urandom_fd));
 }

 int
@ -815,7 +828,11 @@ kernel_init(int mode)
 	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));

-	snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
+	snprintf(hw_serial, sizeof (hw_serial), "%lu",
+	    (unsigned long)gethostid());
+
+	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
+	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);

 	spa_init(mode);
 }
@ -824,6 +841,12 @@ void
 kernel_fini(void)
 {
 	spa_fini();
+
+	close(random_fd);
+	close(urandom_fd);
+
+	random_fd = -1;
+	urandom_fd = -1;
 }

 int
@ -850,3 +873,62 @@ z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,

 	return (ret);
 }
+
+uid_t
+crgetuid(cred_t *cr)
+{
+	return (0);
+}
+
+gid_t
+crgetgid(cred_t *cr)
+{
+	return (0);
+}
+
+int
+crgetngroups(cred_t *cr)
+{
+	return (0);
+}
+
+gid_t *
+crgetgroups(cred_t *cr)
+{
+	return (NULL);
+}
+
+int
+zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
+{
+	return (0);
+}
+
+int
+zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
+{
+	return (0);
+}
+
+int
+zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
+{
+	return (0);
+}
+
+ksiddomain_t *
+ksid_lookupdomain(const char *dom)
+{
+	ksiddomain_t *kd;
+
+	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
+	kd->kd_name = spa_strdup(dom);
+	return (kd);
+}
+
+void
+ksiddomain_rele(ksiddomain_t *ksid)
+{
+	spa_strfree(ksid->kd_name);
+	umem_free(ksid, sizeof (ksiddomain_t));
+}
--- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@ -19,15 +19,13 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

 #ifndef _SYS_ZFS_CONTEXT_H
 #define	_SYS_ZFS_CONTEXT_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@ -64,6 +62,7 @@ extern "C" {
 #include <fsshare.h>
 #include <sys/note.h>
 #include <sys/types.h>
+#include <sys/cred.h>
 #include <sys/atomic.h>
 #include <sys/sysmacros.h>
 #include <sys/bitmap.h>
@ -78,8 +77,10 @@ extern "C" {
 #include <sys/debug.h>
 #include <sys/sdt.h>
 #include <sys/kstat.h>
+#include <sys/u8_textprep.h>
 #include <sys/kernel.h>
 #include <sys/disk.h>
+#include <sys/sysevent/eventdefs.h>
 #include <machine/atomic.h>

 #define	ZFS_EXPORTS_PATH	"/etc/zfs/exports"
@ -116,11 +117,12 @@ extern void vcmn_err(int, const char *, __va_list);
 extern void panic(const char *, ...);
 extern void vpanic(const char *, __va_list);

+#define	fm_panic	panic
+
 /* This definition is copied from assert.h. */
 #if defined(__STDC__)
 #if __STDC_VERSION__ - 0 >= 199901L
-#define	verify(EX) (void)((EX) || \
-	(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
+#define	verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
 #else
 #define	verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
 #endif /* __STDC_VERSION__ - 0 >= 199901L */
@ -167,11 +169,16 @@ _NOTE(CONSTCOND) } while (0)
 #endif

 /*
- * Dtrace SDT probes have different signatures in userland than they do in
+ * DTrace SDT probes have different signatures in userland than they do in
 * kernel.  If they're being used in kernel code, re-define them out of
 * existence for their counterparts in libzpool.
 */

+#ifdef DTRACE_PROBE
+#undef	DTRACE_PROBE
+#define	DTRACE_PROBE(a)	((void)0)
+#endif	/* DTRACE_PROBE */
+
 #ifdef DTRACE_PROBE1
 #undef	DTRACE_PROBE1
 #define	DTRACE_PROBE1(a, b, c)	((void)0)
@ -212,8 +219,9 @@ extern kthread_t *zk_thread_create(void (*func)(), void *arg);
 * Mutexes
 */
 typedef struct kmutex {
-	void	*m_owner;
-	mutex_t	m_lock;
+	void		*m_owner;
+	boolean_t	initialized;
+	mutex_t		m_lock;
 } kmutex_t;

 #define	MUTEX_DEFAULT	USYNC_THREAD
@ -243,6 +251,7 @@ extern void *mutex_owner(kmutex_t *mp);
 typedef struct krwlock {
 	int		rw_count;
 	void		*rw_owner;
+	boolean_t	initialized;
 	rwlock_t	rw_lock;
 } krwlock_t;

@ -253,6 +262,7 @@ typedef int krw_t;
 #define	RW_DEFAULT	USYNC_THREAD

 #undef RW_READ_HELD
+#define RW_READ_HELD(x)		((x)->rw_owner == NULL && (x)->rw_count > 0)

 #undef RW_WRITE_HELD
 #define	RW_WRITE_HELD(x)	((x)->rw_owner == curthread)
@ -267,6 +277,11 @@ extern void rw_exit(krwlock_t *rwlp);
 extern int rw_lock_held(krwlock_t *rwlp);
 #define	rw_downgrade(rwlp) do { } while (0)

+extern uid_t crgetuid(cred_t *cr);
+extern gid_t crgetgid(cred_t *cr);
+extern int crgetngroups(cred_t *cr);
+extern gid_t *crgetgroups(cred_t *cr);
+
 /*
 * Condition variables
 */
@ -285,6 +300,7 @@ extern void cv_broadcast(kcondvar_t *cv);
 * Kernel memory
 */
 #define	KM_SLEEP		UMEM_NOFAIL
+#define	KM_PUSHPAGE		KM_SLEEP
 #define	KM_NOSLEEP		UMEM_DEFAULT
 #define	KMC_NODEBUG		UMC_NODEBUG
 #define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
@ -322,6 +338,9 @@ extern void	taskq_destroy(taskq_t *);
 extern void	taskq_wait(taskq_t *);
 extern int	taskq_member(taskq_t *, void *);

+#define	XVA_MAPSIZE	3
+#define	XVA_MAGIC	0x78766174
+
 /*
 * vnodes
 */
@ -331,44 +350,93 @@ typedef struct vnode {
 	char		*v_path;
 } vnode_t;

+
+typedef struct xoptattr {
+	timestruc_t	xoa_createtime;	/* Create time of file */
+	uint8_t		xoa_archive;
+	uint8_t		xoa_system;
+	uint8_t		xoa_readonly;
+	uint8_t		xoa_hidden;
+	uint8_t		xoa_nounlink;
+	uint8_t		xoa_immutable;
+	uint8_t		xoa_appendonly;
+	uint8_t		xoa_nodump;
+	uint8_t		xoa_settable;
+	uint8_t		xoa_opaque;
+	uint8_t		xoa_av_quarantined;
+	uint8_t		xoa_av_modified;
+} xoptattr_t;
+
 typedef struct vattr {
 	uint_t		va_mask;	/* bit-mask of attributes */
 	u_offset_t	va_size;	/* file size in bytes */
 } vattr_t;

-#define	AT_TYPE		0x0001
-#define	AT_MODE		0x0002
-#define	AT_UID		0x0004
-#define	AT_GID		0x0008
-#define	AT_FSID		0x0010
-#define	AT_NODEID	0x0020
-#define	AT_NLINK	0x0040
-#define	AT_SIZE		0x0080
-#define	AT_ATIME	0x0100
-#define	AT_MTIME	0x0200
-#define	AT_CTIME	0x0400
-#define	AT_RDEV		0x0800
-#define	AT_BLKSIZE	0x1000
-#define	AT_NBLOCKS	0x2000
-#define	AT_SEQ		0x8000
+
+typedef struct xvattr {
+	vattr_t		xva_vattr;	/* Embedded vattr structure */
+	uint32_t	xva_magic;	/* Magic Number */
+	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
+	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
+	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
+	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
+	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
+} xvattr_t;
+
+typedef struct vsecattr {
+	uint_t		vsa_mask;	/* See below */
+	int		vsa_aclcnt;	/* ACL entry count */
+	void		*vsa_aclentp;	/* pointer to ACL entries */
+	int		vsa_dfaclcnt;	/* default ACL entry count */
+	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
+	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
+} vsecattr_t;
+
+#define	AT_TYPE		0x00001
+#define	AT_MODE		0x00002
+#define	AT_UID		0x00004
+#define	AT_GID		0x00008
+#define	AT_FSID		0x00010
+#define	AT_NODEID	0x00020
+#define	AT_NLINK	0x00040
+#define	AT_SIZE		0x00080
+#define	AT_ATIME	0x00100
+#define	AT_MTIME	0x00200
+#define	AT_CTIME	0x00400
+#define	AT_RDEV		0x00800
+#define	AT_BLKSIZE	0x01000
+#define	AT_NBLOCKS	0x02000
+#define	AT_SEQ		0x08000
+#define	AT_XVATTR	0x10000

 #define	CRCREAT		0

-#define	VOP_CLOSE(vp, f, c, o, cr)	0
-#define	VOP_PUTPAGE(vp, of, sz, fl, cr)	0
-#define	VOP_GETATTR(vp, vap, fl)	((vap)->va_size = (vp)->v_size, 0)
+#define	VOP_CLOSE(vp, f, c, o, cr, ct)	0
+#define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct)	0
+#define	VOP_GETATTR(vp, vap, cr)	((vap)->va_size = (vp)->v_size, 0)

-#define	VOP_FSYNC(vp, f, cr)	fsync((vp)->v_fd)
+#define	VOP_FSYNC(vp, f, cr, ct)	fsync((vp)->v_fd)

-#define	VN_RELE(vp)	vn_close(vp)
+#define	VN_RELE(vp)	vn_close(vp, 0, NULL, NULL)
+
+#define	vn_lock(vp, type)
+#define	VOP_UNLOCK(vp, type)
+#ifdef VFS_LOCK_GIANT
+#undef VFS_LOCK_GIANT
+#endif
+#define	VFS_LOCK_GIANT(mp)	0
+#ifdef VFS_UNLOCK_GIANT
+#undef VFS_UNLOCK_GIANT
+#endif
+#define	VFS_UNLOCK_GIANT(vfslocked)

 extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
    int x2, int x3);
 extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
-    int x2, int x3, vnode_t *vp);
+    int x2, int x3, vnode_t *vp, int fd);
 extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
    offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
-extern void vn_close(vnode_t *vp);
+extern void vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td);

 #define	vn_remove(path, x1, x2)		remove(path)
 #define	vn_rename(from, to, seg)	rename((from), (to))
@ -397,8 +465,9 @@ extern void delay(clock_t ticks);

 #define	CPU_SEQID	(thr_self() & (max_ncpus - 1))

-#define	kcred		NULL
-#define	CRED()		NULL
+#ifndef ptob
+#define	ptob(x)		((x) * PAGESIZE)
+#endif

 extern uint64_t physmem;

@ -455,11 +524,31 @@ struct bootstat {
 	uint64_t st_size;
 };

+typedef struct ace_object {
+	uid_t		a_who;
+	uint32_t	a_access_mask;
+	uint16_t	a_flags;
+	uint16_t	a_type;
+	uint8_t		a_obj_type[16];
+	uint8_t		a_inherit_obj_type[16];
+} ace_object_t;
+
+
+#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE	0x05
+#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE	0x06
+#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE	0x07
+#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE	0x08
+
 extern struct _buf *kobj_open_file(char *name);
 extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
    unsigned off);
 extern void kobj_close_file(struct _buf *file);
 extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
+extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
+extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
+    cred_t *cr);
+extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
+extern zoneid_t getzoneid(void);
 /* Random compatibility stuff. */
 #define	lbolt	(gethrtime() >> 23)
 #define	lbolt64	(gethrtime() >> 23)
@ -482,18 +571,32 @@ struct file {
 #define	FCREAT	O_CREAT
 #define	FOFFMAX	0x0

+/* SID stuff */
+typedef struct ksiddomain {
+	uint_t	kd_ref;
+	uint_t	kd_len;
+	char	*kd_name;
+} ksiddomain_t;
+
+ksiddomain_t *ksid_lookupdomain(const char *);
+void ksiddomain_rele(ksiddomain_t *);
+
 #define	SX_SYSINIT(name, lock, desc)

 #define	SYSCTL_DECL(...)
 #define	SYSCTL_NODE(...)
 #define	SYSCTL_INT(...)
+#define	SYSCTL_UINT(...)
 #define	SYSCTL_ULONG(...)
+#define	SYSCTL_QUAD(...)
 #ifdef TUNABLE_INT
 #undef TUNABLE_INT
 #undef TUNABLE_ULONG
+#undef TUNABLE_QUAD
 #endif
 #define	TUNABLE_INT(...)
 #define	TUNABLE_ULONG(...)
+#define	TUNABLE_QUAD(...)

 /* Errors */

--- a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
@ -2,9 +2,8 @@
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
@ -20,7 +19,7 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

@ -177,6 +176,9 @@ taskq_create(const char *name, int nthreads, pri_t pri,
 	int t;

 	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
 	tq->tq_flags = flags | TASKQ_ACTIVE;
 	tq->tq_active = nthreads;
 	tq->tq_nthreads = nthreads;
@ -230,6 +232,9 @@ taskq_destroy(taskq_t *tq)
 	kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));

 	rw_destroy(&tq->tq_threadlock);
+	mutex_destroy(&tq->tq_lock);
+	cv_destroy(&tq->tq_dispatch_cv);
+	cv_destroy(&tq->tq_wait_cv);

 	kmem_free(tq, sizeof (taskq_t));
 }
--- a/cddl/contrib/opensolaris/lib/libzpool/common/util.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/util.c
@ -19,12 +19,10 @@
 * CDDL HEADER END
 */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <assert.h>
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
@ -67,46 +65,58 @@ nicenum(uint64_t num, char *buf)
 }

 static void
-show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
+show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
 {
+	vdev_stat_t *vs;
+	vdev_stat_t v0 = { 0 };
+	uint64_t sec;
+	uint64_t is_log = 0;
 	nvlist_t **child;
 	uint_t c, children;
-	vdev_stat_t *vs;
-	uint64_t sec;
 	char used[6], avail[6];
 	char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
+	char *prefix = "";

-	if (indent == 0) {
-		(void) printf("                     "
+	if (indent == 0 && desc != NULL) {
+		(void) printf("                           "
 		    " capacity   operations   bandwidth  ---- errors ----\n");
-		(void) printf("description          "
+		(void) printf("description                "
 		    "used avail  read write  read write  read write cksum\n");
 	}

-	VERIFY(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
-	    (uint64_t **)&vs, &c) == 0);
+	if (desc != NULL) {
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);

-	sec = MAX(1, vs->vs_timestamp / NANOSEC);
+		if (is_log)
+			prefix = "log ";

-	nicenum(vs->vs_alloc, used);
-	nicenum(vs->vs_space - vs->vs_alloc, avail);
-	nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
-	nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
-	nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
-	nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
-	nicenum(vs->vs_read_errors, rerr);
-	nicenum(vs->vs_write_errors, werr);
-	nicenum(vs->vs_checksum_errors, cerr);
+		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &c) != 0)
+			vs = &v0;

-	(void) printf("%*s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
-	    indent, "",
-	    indent - 19 - (vs->vs_space ? 0 : 12), desc,
-	    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
-	    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
-	    rops, wops, rbytes, wbytes, rerr, werr, cerr);
+		sec = MAX(1, vs->vs_timestamp / NANOSEC);

-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) != 0)
+		nicenum(vs->vs_alloc, used);
+		nicenum(vs->vs_space - vs->vs_alloc, avail);
+		nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
+		nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
+		nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
+		nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
+		nicenum(vs->vs_read_errors, rerr);
+		nicenum(vs->vs_write_errors, werr);
+		nicenum(vs->vs_checksum_errors, cerr);
+
+		(void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
+		    indent, "",
+		    prefix,
+		    indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12),
+		    desc,
+		    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
+		    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
+		    rops, wops, rbytes, wbytes, rerr, werr, cerr);
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0)
 		return;

 	for (c = 0; c < children; c++) {
@ -120,7 +130,7 @@ show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
 		(void) strcpy(tname, cname);
 		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
 			tname[strlen(tname)] = '0' + np;
-		show_vdev_stats(tname, cnv, indent + 2);
+		show_vdev_stats(tname, ctype, cnv, indent + 2);
 		free(tname);
 	}
 }
@ -131,14 +141,16 @@ show_pool_stats(spa_t *spa)
 	nvlist_t *config, *nvroot;
 	char *name;

-	spa_config_enter(spa, RW_READER, FTAG);
-	config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
-	spa_config_exit(spa, FTAG);
+	VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0);

 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &name) == 0);

-	show_vdev_stats(name, nvroot, 0);
+	show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0);
+	show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0);
+	show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0);
+
+	nvlist_free(config);
 }
--- a/cddl/lib/libzfs/Makefile
+++ b/cddl/lib/libzfs/Makefile
@ -10,14 +10,17 @@ DPADD=	${LIBUTIL}
 LDADD=	-lutil

 SRCS=	deviceid.c \
-	mnttab.c \
-	mkdirp.c \
-	zmount.c \
 	fsshare.c \
+	mkdirp.c \
+	mnttab.c \
+	zmount.c \
 	zone.c

-SRCS+=	zfs_namecheck.c \
+SRCS+=	zfs_deleg.c \
+	zfs_namecheck.c \
 	zfs_prop.c \
+	zpool_prop.c \
+	zprop_common.c \
 	libzfs_dataset.c \
 	libzfs_util.c \
 	libzfs_graph.c \
@ -26,6 +29,7 @@ SRCS+=	zfs_namecheck.c \
 	libzfs_changelist.c \
 	libzfs_config.c \
 	libzfs_import.c \
+	libzfs_sendrecv.c \
 	libzfs_status.c

 CFLAGS+= -DZFS_NO_ACL
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@ -18,6 +18,8 @@ ATOMIC_SRCS=	atomic.S
 .PATH: ${.CURDIR}/../../../sys/cddl/compat/opensolaris/kern
 ATOMIC_SRCS=	opensolaris_atomic.c
 .endif
+# UNICODE_SRCS
+.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/unicode

 LIB=		zpool

@ -25,9 +27,13 @@ ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c
 ZFS_SHARED_SRCS= ${ZFS_SHARED_OBJS:C/.o$/.c/}
 KERNEL_SRCS=	kernel.c taskq.c util.c
 LIST_SRCS=	list.c
+UNICODE_SRCS=	u8_textprep.c

 SRCS=		${ZFS_COMMON_SRCS} ${ZFS_SHARED_SRCS} \
-		${KERNEL_SRCS} ${LIST_SRCS} ${ATOMIC_SRCS}
+		${KERNEL_SRCS} ${LIST_SRCS} ${ATOMIC_SRCS} \
+		${UNICODE_SRCS}
+
+CFLAGS+=	-std=c99

 CFLAGS+=	-I${.CURDIR}/../../../sys/cddl/compat/opensolaris
 CFLAGS+=	-I${.CURDIR}/../../../cddl/compat/opensolaris/include
@ -40,6 +46,13 @@ CFLAGS+=	-I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
 CFLAGS+=	-I${.CURDIR}/../../../cddl/contrib/opensolaris/head
 CFLAGS+=	-I${.CURDIR}/../../../cddl/lib/libumem
 CFLAGS+=	-I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
+# XXX: pthread doesn't have mutex_owned() equivalent, so we need to look
+#      into libthr private structures. That's sooo evil, but it's only for
+#      ZFS debugging tools needs.
+CFLAGS+=	-DWANTS_MUTEX_OWNED
+CFLAGS+=	-I${.CURDIR}/../../../lib/libpthread/thread
+CFLAGS+=	-I${.CURDIR}/../../../lib/libpthread/sys
+CFLAGS+=	-I${.CURDIR}/../../../lib/libthr/arch/${MACHINE_ARCH}/include

 DPADD=		${LIBPTHREAD} ${LIBZ}
 LDADD=		-lpthread -lz
--- a/cddl/sbin/zpool/Makefile
+++ b/cddl/sbin/zpool/Makefile
@ -1,10 +1,11 @@
 # $FreeBSD$

-.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/zpool
+.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/zpool \
+	${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs

 PROG=	zpool
 MAN=	zpool.8
-SRCS=	zpool_main.c zpool_vdev.c zpool_iter.c zpool_util.c
+SRCS=	zpool_main.c zpool_vdev.c zpool_iter.c zpool_util.c zfs_comutil.c

 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
@ -15,6 +16,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
--- a/cddl/usr.bin/Makefile
+++ b/cddl/usr.bin/Makefile
@ -7,9 +7,11 @@ SUBDIR= \
 	ctfdump \
 	ctfmerge \
 	sgsmsg \
+	${_zinject} \
 	${_ztest}

 .if ${MK_ZFS} != "no"
+_zinject= zinject
 .if ${MK_LIBTHR} != "no"
 _ztest=	ztest
 .endif
--- a/cddl/usr.bin/zinject/Makefile
+++ b/cddl/usr.bin/zinject/Makefile
@ -0,0 +1,25 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../contrib/opensolaris/cmd/zinject
+
+PROG=	zinject
+SRCS=	zinject.c translate.c
+NO_MAN=
+
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzfs/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
+CFLAGS+= -I${.CURDIR}/../../lib/libumem
+
+DPADD=	${LIBAVL} ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBUUTIL} \
+	${LIBZFS} ${LIBZPOOL} ${LIBUUTIL}
+LDADD=	-lavl -lgeom -lm -lnvpair -lumem -luutil -lzfs -lzpool
+
+.include <bsd.prog.mk>
--- a/cddl/usr.bin/ztest/Makefile
+++ b/cddl/usr.bin/ztest/Makefile
@ -1,18 +1,20 @@
 # $FreeBSD$

-.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/ztest
+.PATH: ${.CURDIR}/../..//contrib/opensolaris/cmd/ztest

 PROG=	ztest
 NO_MAN=

+CFLAGS+= -std=c99
+
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
-CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
-CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
-CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
-CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
 CFLAGS+= -I${.CURDIR}/../../lib/libumem

 DPADD=	${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
--- a/cddl/usr.sbin/zdb/Makefile
+++ b/cddl/usr.sbin/zdb/Makefile
@ -6,9 +6,14 @@ PROG=	zdb
 MAN=	zdb.8
 SRCS=	zdb.c zdb_il.c

+CFLAGS+= -std=c99
+
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
 CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
 CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
@ -16,8 +21,8 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
 CFLAGS+= -I${.CURDIR}/../../lib/libumem

-DPADD=	${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
-	${LIBPTHREAD} ${LIBZ} ${LIBAVL}
-LDADD=	-lm -lnvpair -lumem -lzpool -lpthread -lz -lavl
+DPADD=	${LIBAVL} ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \
+	${LIBUUTIL} ${LIBZ} ${LIBZFS} ${LIBZPOOL}
+LDADD=	-lavl -lgeom -lm -lnvpair -lpthread -lumem -luutil -lz -lzfs -lzpool

 .include <bsd.prog.mk>
--- a/lib/libc/gen/getvfsbyname.3
+++ b/lib/libc/gen/getvfsbyname.3
@ -68,7 +68,7 @@ flag bits, as described below
 .Pp
 The flags are defined as follows:
 .Pp
-.Bl -tag -width VFCF_SYNTHETIC -compact
+.Bl -tag -width VFCF_DELEGADMIN -compact
 .It Dv VFCF_STATIC
 statically compiled into kernel
 .It Dv VFCF_NETWORK
@ -85,7 +85,12 @@ stores file names as Unicode
 can be mounted from within a jail if
 .Va security.jail.mount_allowed
 sysctl is set to
-.Dv 1 .
+.Dv 1
+.It Dv VFCF_DELEGADMIN
+supports delegated administration if
+.Va vfs.usermount
+sysctl is set to
+.Dv 1
 .El
 .Sh RETURN VALUES
 .Rv -std getvfsbyname
--- a/share/man/man9/VFS_SET.9
+++ b/share/man/man9/VFS_SET.9
@ -57,7 +57,7 @@ as the event handler.
 Possible values for the
 .Fa flags
 argument are:
-.Bl -hang -width ".Dv VFCF_SYNTHETIC"
+.Bl -hang -width ".Dv VFCF_DELEGADMIN"
 .It Dv VFCF_STATIC
 File system should be statically available in the kernel.
 .It Dv VFCF_NETWORK
@ -71,10 +71,15 @@ Loopback file system layer.
 .It Dv VFCF_UNICODE
 File names are stored as Unicode.
 .It Dv VFCF_JAIL
-can be mounted from within a jail if
+Can be mounted from within a jail if
 .Va security.jail.mount_allowed
 sysctl is set to
 .Dv 1 .
+.It Dv VFCF_DELEGADMIN
+Supports delegated administration if
+.Va vfs.usermount
+sysctl is set to
+.Dv 1 .
 .El
 .Sh PSEUDOCODE
 .Bd -literal
--- a/sys/boot/Makefile
+++ b/sys/boot/Makefile
@ -26,6 +26,10 @@ SUBDIR+=		ofw
 SUBDIR+=		uboot
 .endif

+.if defined(LOADER_ZFS_SUPPORT)
+SUBDIR+=		zfs
+.endif
+
 # Pick the machine-dependent subdir based on the target architecture.
 ADIR=			${MACHINE:S/amd64/i386/:S/sun4v/sparc64/}
 .if exists(${.CURDIR}/${ADIR}/.)
--- a/sys/boot/common/bootstrap.h
+++ b/sys/boot/common/bootstrap.h
@ -43,6 +43,7 @@ struct devdesc
 #define DEVT_DISK	1
 #define DEVT_NET	2
 #define	DEVT_CD		3
+#define DEVT_ZFS	4
    int			d_unit;
 };

--- a/sys/boot/i386/Makefile
+++ b/sys/boot/i386/Makefile
@ -1,7 +1,7 @@
 # $FreeBSD$

-SUBDIR=		mbr pmbr boot0 boot0sio btx boot2 cdboot gptboot kgzldr \
-		libi386 libfirewire loader
+SUBDIR=		mbr pmbr boot0 boot0sio btx boot2 cdboot gptboot zfsboot \
+		kgzldr libi386 libfirewire loader

 # special boot programs, 'self-extracting boot2+loader'
 SUBDIR+=	pxeldr
--- a/sys/boot/i386/libi386/bootinfo32.c
+++ b/sys/boot/i386/libi386/bootinfo32.c
@ -183,6 +183,7 @@ bi_load32(char *args, int *howtop, int *bootdevp, vm_offset_t *bip, vm_offset_t
 	break;

    case DEVT_NET:
+    case DEVT_ZFS:
 	    break;
 	    
    default:
--- a/sys/boot/i386/libi386/devicename.c
+++ b/sys/boot/i386/libi386/devicename.c
@ -167,6 +167,7 @@ i386_parsedev(struct i386_devdesc **dev, const char *devspec, const char **path)

    case DEVT_CD:
    case DEVT_NET:
+    case DEVT_ZFS:
 	unit = 0;

 	if (*np && (*np != ':')) {
@ -238,6 +239,7 @@ i386_fmtdev(void *vdev)
 	break;

    case DEVT_NET:
+    case DEVT_ZFS:
 	sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit);
 	break;
    }
--- a/sys/boot/i386/loader/Makefile
+++ b/sys/boot/i386/loader/Makefile
@ -17,6 +17,12 @@ CFLAGS+=	-DLOADER_FIREWIRE_SUPPORT
 LIBFIREWIRE=	${.OBJDIR}/../libfirewire/libfirewire.a
 .endif

+# Put LOADER_ZFS_SUPPORT=yes in /etc/make.conf for ZFS support
+.if defined(LOADER_ZFS_SUPPORT)
+CFLAGS+=	-DLOADER_ZFS_SUPPORT
+LIBZFS=		${.OBJDIR}/../../zfs/libzfsboot.a
+.endif
+
 # Enable PXE TFTP or NFS support, not both.
 .if defined(LOADER_TFTP_SUPPORT)
 CFLAGS+=	-DLOADER_TFTP_SUPPORT
@ -98,8 +104,8 @@ FILES+=	loader.rc
 # XXX crt0.o needs to be first for pxeboot(8) to work
 OBJS=	${BTXCRT} 

-DPADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBI386} ${LIBSTAND}
-LDADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBI386} -lstand
+DPADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBZFS} ${LIBI386} ${LIBSTAND}
+LDADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBZFS} ${LIBI386} -lstand

 .include <bsd.prog.mk>

--- a/sys/boot/i386/loader/conf.c
+++ b/sys/boot/i386/loader/conf.c
@ -50,6 +50,10 @@ __FBSDID("$FreeBSD$");
 extern struct devsw fwohci;
 #endif

+#if defined(LOADER_ZFS_SUPPORT)
+extern struct devsw zfs_dev;
+#endif
+
 /* Exported for libstand */
 struct devsw *devsw[] = {
    &bioscd,
@ -59,16 +63,26 @@ struct devsw *devsw[] = {
 #endif
 #if defined(LOADER_FIREWIRE_SUPPORT)
    &fwohci,
+#endif
+#if defined(LOADER_ZFS_SUPPORT)
+    &zfs_dev,
 #endif
    NULL
 };

+#if defined(LOADER_ZFS_SUPPORT)
+extern struct fs_ops zfs_fsops;
+#endif
+
 struct fs_ops *file_system[] = {
    &ufs_fsops,
    &ext2fs_fsops,
    &dosfs_fsops,
    &cd9660_fsops,
    &splitfs_fsops,
+#if defined(LOADER_ZFS_SUPPORT)
+    &zfs_fsops,
+#endif
 #ifdef LOADER_GZIP_SUPPORT
    &gzipfs_fsops,
 #endif
--- a/sys/boot/i386/loader/main.c
+++ b/sys/boot/i386/loader/main.c
@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");

 #define	KARGS_FLAGS_CD		0x1
 #define	KARGS_FLAGS_PXE		0x2
+#define	KARGS_FLAGS_ZFS		0x4

 /* Arguments passed in from the boot1/boot2 loader */
 static struct 
@ -51,8 +52,13 @@ static struct
    u_int32_t	howto;
    u_int32_t	bootdev;
    u_int32_t	bootflags;
-    u_int32_t	pxeinfo;
-    u_int32_t	res2;
+    union {
+	struct {
+	    u_int32_t	pxeinfo;
+	    u_int32_t	res2;
+	};
+	uint64_t	zfspool;
+    };
    u_int32_t	bootinfo;
 } *kargs;

@ -96,7 +102,7 @@ main(void)
     */
    bios_getmem();

-#if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT)
+#if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || defined(LOADER_ZFS_SUPPORT)
    heap_top = PTOV(memtop_copyin);
    memtop_copyin -= 0x300000;
    heap_bottom = PTOV(memtop_copyin);
@ -145,6 +151,14 @@ main(void)
 	    bc_add(initial_bootdev);
    }

+    archsw.arch_autoload = i386_autoload;
+    archsw.arch_getdev = i386_getdev;
+    archsw.arch_copyin = i386_copyin;
+    archsw.arch_copyout = i386_copyout;
+    archsw.arch_readin = i386_readin;
+    archsw.arch_isainb = isa_inb;
+    archsw.arch_isaoutb = isa_outb;
+
    /*
     * March through the device switch probing for things.
     */
@ -172,14 +186,6 @@ main(void)
    
    bios_getsmap();

-    archsw.arch_autoload = i386_autoload;
-    archsw.arch_getdev = i386_getdev;
-    archsw.arch_copyin = i386_copyin;
-    archsw.arch_copyout = i386_copyout;
-    archsw.arch_readin = i386_readin;
-    archsw.arch_isainb = isa_inb;
-    archsw.arch_isaoutb = isa_outb;
-
    interact();			/* doesn't return */

    /* if we ever get here, it is an error */
@ -252,6 +258,29 @@ extract_currdev(void)
 	       i386_setcurrdev, env_nounset);
    env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset,
 	       env_nounset);
+
+#ifdef LOADER_ZFS_SUPPORT
+    /*
+     * If we were started from a ZFS-aware boot2, we can work out
+     * which ZFS pool we are booting from.
+     */
+    if (kargs->bootflags & KARGS_FLAGS_ZFS) {
+	/*
+	 * Dig out the pool guid and convert it to a 'unit number'
+	 */
+	uint64_t guid;
+	int unit;
+	char devname[32];
+	extern int zfs_guid_to_unit(uint64_t);
+
+	guid = kargs->zfspool;
+	unit = zfs_guid_to_unit(guid);
+	if (unit >= 0) {
+	    sprintf(devname, "zfs%d", unit);
+	    setenv("currdev", devname, 1);
+	}
+    }
+#endif
 }

 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
--- a/sys/boot/i386/zfsboot/Makefile
+++ b/sys/boot/i386/zfsboot/Makefile
@ -0,0 +1,108 @@
+# $FreeBSD$
+
+.PATH:		${.CURDIR}/../boot2
+
+FILES=		zfsboot
+
+NM?=		nm
+
+# A value of 0x80 enables LBA support.
+BOOT_BOOT1_FLAGS?=	0x80
+
+BOOT_COMCONSOLE_PORT?= 0x3f8
+BOOT_COMCONSOLE_SPEED?= 9600
+B2SIOFMT?=	0x3
+
+REL1=	0x700
+ORG1=	0x7c00
+ORG2=	0x2000
+
+CFLAGS=	-Os -g \
+	-fno-guess-branch-probability \
+	-fomit-frame-pointer \
+	-fno-unit-at-a-time \
+	-mno-align-long-strings \
+	-mrtd \
+	-mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 \
+	-DBOOT2 \
+	-DFLAGS=${BOOT_BOOT1_FLAGS} \
+	-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
+	-DSIOFMT=${B2SIOFMT} \
+	-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
+	-I${.CURDIR}/../../zfs \
+	-I${.CURDIR}/../../../cddl/boot/zfs \
+	-I${.CURDIR}/../btx/lib -I. \
+	-I${.CURDIR}/../boot2 \
+	-Wall -Waggregate-return -Wbad-function-cast -Wcast-align \
+	-Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
+	-Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \
+	-Winline --param max-inline-insns-single=100
+
+LDFLAGS=-static -N --gc-sections
+
+# Pick up ../Makefile.inc early.
+.include <bsd.init.mk>
+
+CLEANFILES=	zfsboot
+
+zfsboot: zfsboot1 zfsboot2
+	cat zfsboot1 zfsboot2 > zfsboot
+
+CLEANFILES+=	zfsboot1 zfsldr.out zfsldr.o
+
+zfsboot1: zfsldr.out
+	objcopy -S -O binary zfsldr.out ${.TARGET}
+
+zfsldr.out: zfsldr.o
+	${LD} ${LDFLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} zfsldr.o
+
+CLEANFILES+=	zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \
+		zfsboot.o zfsboot.s zfsboot.s.tmp zfsboot.h sio.o
+
+# We currently allow 32768 bytes for zfsboot - in practice it could be
+# any size up to 3.5Mb but keeping it fixed size simplifies zfsldr.
+# 
+BOOT2SIZE=	32768
+
+zfsboot2: zfsboot.ld
+	@set -- `ls -l zfsboot.ld`; x=$$((${BOOT2SIZE}-$$5)); \
+	    echo "$$x bytes available"; test $$x -ge 0
+	dd if=zfsboot.ld of=${.TARGET} obs=${BOOT2SIZE} conv=osync
+
+zfsboot.ld: zfsboot.ldr zfsboot.bin ${BTXKERN}
+	btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l zfsboot.ldr \
+	    -o ${.TARGET} -P 1 zfsboot.bin
+
+zfsboot.ldr:
+	cp /dev/null ${.TARGET}
+
+zfsboot.bin: zfsboot.out
+	objcopy -S -O binary zfsboot.out ${.TARGET}
+
+zfsboot.out: ${BTXCRT} zfsboot.o sio.o
+	${LD} ${LDFLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC}
+
+zfsboot.o: zfsboot.s
+
+SRCS=	zfsboot.c zfsboot.h
+
+zfsboot.s: zfsboot.c zfsboot.h ${.CURDIR}/../../zfs/zfsimpl.c
+	${CC} ${CFLAGS} -S -o zfsboot.s.tmp ${.CURDIR}/zfsboot.c
+	sed -e '/align/d' -e '/nop/d' < zfsboot.s.tmp > zfsboot.s
+	rm -f zfsboot.s.tmp
+
+zfsboot.h: zfsldr.out
+	${NM} -t d ${.ALLSRC} | awk '/([0-9])+ T xread/ \
+	    { x = $$1 - ORG1; \
+	    printf("#define XREADORG %#x\n", REL1 + x) }' \
+	    ORG1=`printf "%d" ${ORG1}` \
+	    REL1=`printf "%d" ${REL1}` > ${.TARGET}
+
+.if ${MACHINE_ARCH} == "amd64"
+beforedepend zfsboot.s: machine
+CLEANFILES+=	machine
+machine:
+	ln -sf ${.CURDIR}/../../../i386/include machine
+.endif
+
+.include <bsd.prog.mk>
--- a/sys/boot/i386/zfsboot/zfsboot.c
+++ b/sys/boot/i386/zfsboot/zfsboot.c
@ -0,0 +1,944 @@
+/*-
+ * Copyright (c) 1998 Robert Nordier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are freely
+ * permitted provided that the above copyright notice and this
+ * paragraph and the following disclaimer are duplicated in all
+ * such forms.
+ *
+ * This software is provided "AS IS" and without any express or
+ * implied warranties, including, without limitation, the implied
+ * warranties of merchantability and fitness for a particular
+ * purpose.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/diskmbr.h>
+#include <sys/reboot.h>
+#include <sys/queue.h>
+
+#include <machine/bootinfo.h>
+#include <machine/elf.h>
+
+#include <stdarg.h>
+#include <stddef.h>
+
+#include <a.out.h>
+
+#include <btxv86.h>
+
+#include "zfsboot.h"
+#include "lib.h"
+
+#define IO_KEYBOARD	1
+#define IO_SERIAL	2
+
+#define SECOND		18	/* Circa that many ticks in a second. */
+
+#define RBX_ASKNAME	0x0	/* -a */
+#define RBX_SINGLE	0x1	/* -s */
+/* 0x2 is reserved for log2(RB_NOSYNC). */
+/* 0x3 is reserved for log2(RB_HALT). */
+/* 0x4 is reserved for log2(RB_INITNAME). */
+#define RBX_DFLTROOT	0x5	/* -r */
+#define RBX_KDB 	0x6	/* -d */
+/* 0x7 is reserved for log2(RB_RDONLY). */
+/* 0x8 is reserved for log2(RB_DUMP). */
+/* 0x9 is reserved for log2(RB_MINIROOT). */
+#define RBX_CONFIG	0xa	/* -c */
+#define RBX_VERBOSE	0xb	/* -v */
+#define RBX_SERIAL	0xc	/* -h */
+#define RBX_CDROM	0xd	/* -C */
+/* 0xe is reserved for log2(RB_POWEROFF). */
+#define RBX_GDB 	0xf	/* -g */
+#define RBX_MUTE	0x10	/* -m */
+/* 0x11 is reserved for log2(RB_SELFTEST). */
+/* 0x12 is reserved for boot programs. */
+/* 0x13 is reserved for boot programs. */
+#define RBX_PAUSE	0x14	/* -p */
+#define RBX_QUIET	0x15	/* -q */
+#define RBX_NOINTR	0x1c	/* -n */
+/* 0x1d is reserved for log2(RB_MULTIPLE) and is just misnamed here. */
+#define RBX_DUAL	0x1d	/* -D */
+/* 0x1f is reserved for log2(RB_BOOTINFO). */
+
+/* pass: -a, -s, -r, -d, -c, -v, -h, -C, -g, -m, -p, -D */
+#define RBX_MASK	(OPT_SET(RBX_ASKNAME) | OPT_SET(RBX_SINGLE) | \
+			OPT_SET(RBX_DFLTROOT) | OPT_SET(RBX_KDB ) | \
+			OPT_SET(RBX_CONFIG) | OPT_SET(RBX_VERBOSE) | \
+			OPT_SET(RBX_SERIAL) | OPT_SET(RBX_CDROM) | \
+			OPT_SET(RBX_GDB ) | OPT_SET(RBX_MUTE) | \
+			OPT_SET(RBX_PAUSE) | OPT_SET(RBX_DUAL))
+
+/* Hint to loader that we came from ZFS */
+#define	KARGS_FLAGS_ZFS		0x4
+
+#define PATH_CONFIG	"/boot.config"
+#define PATH_BOOT3	"/boot/loader"
+#define PATH_KERNEL	"/boot/kernel/kernel"
+
+#define ARGS		0x900
+#define NOPT		14
+#define NDEV		3
+#define MEM_BASE	0x12
+#define MEM_EXT 	0x15
+#define V86_CY(x)	((x) & 1)
+#define V86_ZR(x)	((x) & 0x40)
+
+#define DRV_HARD	0x80
+#define DRV_MASK	0x7f
+
+#define TYPE_AD		0
+#define TYPE_DA		1
+#define TYPE_MAXHARD	TYPE_DA
+#define TYPE_FD		2
+
+#define OPT_SET(opt)	(1 << (opt))
+#define OPT_CHECK(opt)	((opts) & OPT_SET(opt))
+
+extern uint32_t _end;
+
+static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
+static const unsigned char flags[NOPT] = {
+    RBX_DUAL,
+    RBX_SERIAL,
+    RBX_ASKNAME,
+    RBX_CDROM,
+    RBX_CONFIG,
+    RBX_KDB,
+    RBX_GDB,
+    RBX_MUTE,
+    RBX_NOINTR,
+    RBX_PAUSE,
+    RBX_QUIET,
+    RBX_DFLTROOT,
+    RBX_SINGLE,
+    RBX_VERBOSE
+};
+
+static const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
+static const unsigned char dev_maj[NDEV] = {30, 4, 2};
+
+struct dsk {
+    unsigned drive;
+    unsigned type;
+    unsigned unit;
+    unsigned slice;
+    unsigned part;
+    unsigned start;
+    int init;
+};
+static char cmd[512];
+static char kname[1024];
+static uint32_t opts;
+static int comspeed = SIOSPD;
+static struct bootinfo bootinfo;
+static uint32_t bootdev;
+static uint8_t ioctrl = IO_KEYBOARD;
+
+/* Buffers that must not span a 64k boundary. */
+#define READ_BUF_SIZE	8192
+struct dmadat {
+	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
+	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
+};
+static struct dmadat *dmadat;
+
+void exit(int);
+static void load(void);
+static int parse(void);
+static void printf(const char *,...);
+static void putchar(int);
+static uint32_t memsize(void);
+static int drvread(struct dsk *, void *, unsigned, unsigned);
+static int keyhit(unsigned);
+static int xputc(int);
+static int xgetc(int);
+static int getc(int);
+
+static void memcpy(void *, const void *, int);
+static void
+memcpy(void *dst, const void *src, int len)
+{
+    const char *s = src;
+    char *d = dst;
+
+    while (len--)
+        *d++ = *s++;
+}
+
+static void
+strcpy(char *dst, const char *src)
+{
+    while (*src)
+	*dst++ = *src++;
+    *dst++ = 0;
+}
+
+static void
+strcat(char *dst, const char *src)
+{
+    while (*dst)
+	dst++;
+    while (*src)
+	*dst++ = *src++;
+    *dst++ = 0;
+}
+
+static int
+strcmp(const char *s1, const char *s2)
+{
+    for (; *s1 == *s2 && *s1; s1++, s2++);
+    return (unsigned char)*s1 - (unsigned char)*s2;
+}
+
+static const char *
+strchr(const char *s, char ch)
+{
+    for (; *s; s++)
+	if (*s == ch)
+		return s;
+    return 0;
+}
+
+static int
+memcmp(const void *p1, const void *p2, size_t n)
+{
+    const char *s1 = (const char *) p1;
+    const char *s2 = (const char *) p2;
+    for (; n > 0 && *s1 == *s2; s1++, s2++, n--);
+    if (n)
+        return (unsigned char)*s1 - (unsigned char)*s2;
+    else
+	return 0;
+}
+
+static void
+memset(void *p, char val, size_t n)
+{
+    char *s = (char *) p;
+    while (n--)
+	*s++ = val;
+}
+
+static void *
+malloc(size_t n)
+{
+	static char *heap_next;
+	static char *heap_end;
+
+	if (!heap_next) {
+		heap_next = (char *) dmadat + sizeof(*dmadat);
+		heap_end = (char *) (640*1024);
+	}
+
+	char *p = heap_next;
+	if (p + n > heap_end) {
+		printf("malloc failure\n");
+		for (;;)
+		    ;
+		return 0;
+	}
+	heap_next += n;
+	return p;
+}
+
+static size_t
+strlen(const char *s)
+{
+	size_t len = 0;
+	while (*s++)
+		len++;
+	return len;
+}
+
+static char *
+strdup(const char *s)
+{
+	char *p = malloc(strlen(s) + 1);
+	strcpy(p, s);
+	return p;
+}
+
+#include "zfsimpl.c"
+
+/*
+ * Read from a dnode (which must be from a ZPL filesystem).
+ */
+static int
+zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
+{
+	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
+	size_t n;
+	int rc;
+
+	n = size;
+	if (*offp + n > zp->zp_size)
+		n = zp->zp_size - *offp;
+	
+	rc = dnode_read(spa, dnode, *offp, start, n);
+	if (rc)
+		return (-1);
+	*offp += n;
+
+	return (n);
+}
+
+/*
+ * Current ZFS pool
+ */
+spa_t *spa;
+
+/*
+ * A wrapper for dskread that doesn't have to worry about whether the
+ * buffer pointer crosses a 64k boundary.
+ */
+static int
+vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
+{
+	char *p;
+	unsigned int lba, nb;
+	struct dsk *dsk = (struct dsk *) priv;
+
+	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
+		return -1;
+
+	p = buf;
+	lba = off / DEV_BSIZE;
+	while (bytes > 0) {
+		nb = bytes / DEV_BSIZE;
+		if (nb > READ_BUF_SIZE / DEV_BSIZE)
+			nb = READ_BUF_SIZE / DEV_BSIZE;
+		if (drvread(dsk, dmadat->rdbuf, lba, nb))
+			return -1;
+		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
+		p += nb * DEV_BSIZE;
+		lba += nb;
+		bytes -= nb * DEV_BSIZE;
+	}
+
+	return 0;
+}
+
+static int
+xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
+{
+    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
+	printf("Invalid %s\n", "format");
+	return -1;
+    }
+    return 0;
+}
+
+static inline uint32_t
+memsize(void)
+{
+    v86.addr = MEM_EXT;
+    v86.eax = 0x8800;
+    v86int();
+    return v86.eax;
+}
+
+static inline void
+getstr(void)
+{
+    char *s;
+    int c;
+
+    s = cmd;
+    for (;;) {
+	switch (c = xgetc(0)) {
+	case 0:
+	    break;
+	case '\177':
+	case '\b':
+	    if (s > cmd) {
+		s--;
+		printf("\b \b");
+	    }
+	    break;
+	case '\n':
+	case '\r':
+	    *s = 0;
+	    return;
+	default:
+	    if (s - cmd < sizeof(cmd) - 1)
+		*s++ = c;
+	    putchar(c);
+	}
+    }
+}
+
+static inline void
+putc(int c)
+{
+    v86.addr = 0x10;
+    v86.eax = 0xe00 | (c & 0xff);
+    v86.ebx = 0x7;
+    v86int();
+}
+
+/*
+ * Try to detect a device supported by the legacy int13 BIOS
+ */
+static int
+int13probe(int drive)
+{
+    v86.ctl = V86_FLAGS;
+    v86.addr = 0x13;
+    v86.eax = 0x800;
+    v86.edx = drive;
+    v86int();
+    
+    if (!(v86.efl & 0x1) &&				/* carry clear */
+	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
+	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
+		return(0);				/* skip device */
+	}
+	return (1);
+    }
+    return(0);
+}
+
+static void
+probe_drive(struct dsk *dsk, spa_t **spap)
+{
+    struct dos_partition *dp;
+    char *sec;
+    unsigned i;
+
+    if (!int13probe(dsk->drive))
+	return;
+
+    /*
+     * If we find a vdev on the whole disk, stop here. Otherwise dig
+     * out the MBR and probe each slice in turn for a vdev.
+     */
+    if (vdev_probe(vdev_read, dsk, spap) == 0)
+	return;
+
+    sec = dmadat->secbuf;
+    dsk->start = 0;
+    if (drvread(dsk, sec, DOSBBSECTOR, 1))
+	return;
+    dp = (void *)(sec + DOSPARTOFF);
+
+    for (i = 0; i < NDOSPART; i++) {
+	if (!dp[i].dp_typ)
+	    continue;
+	dsk->start = dp[i].dp_start;
+	if (vdev_probe(vdev_read, dsk, spap) == 0) {
+	    /*
+	     * We record the first pool we find (we will try to boot
+	     * from that one.
+	     */
+	    spap = 0;
+
+	    /*
+	     * This slice had a vdev. We need a new dsk structure now
+	     * sice the vdev now owns this one.
+	     */
+	    struct dsk *newdsk;
+	    newdsk = malloc(sizeof(struct dsk));
+	    *newdsk = *dsk;
+	    dsk = newdsk;
+	}
+    }
+}
+
+int
+main(void)
+{
+    int autoboot, i;
+    dnode_phys_t dn;
+    off_t off;
+    struct dsk *dsk;
+
+    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
+    v86.ctl = V86_FLAGS;
+
+    dsk = malloc(sizeof(struct dsk));
+    dsk->drive = *(uint8_t *)PTOV(ARGS);
+    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
+    dsk->unit = dsk->drive & DRV_MASK;
+    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
+    dsk->part = 0;
+    dsk->start = 0;
+    dsk->init = 0;
+
+    bootinfo.bi_version = BOOTINFO_VERSION;
+    bootinfo.bi_size = sizeof(bootinfo);
+    bootinfo.bi_basemem = 0;	/* XXX will be filled by loader or kernel */
+    bootinfo.bi_extmem = memsize();
+    bootinfo.bi_memsizes_valid++;
+    bootinfo.bi_bios_dev = dsk->drive;
+
+    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
+			  dsk->slice, dsk->unit, dsk->part),
+
+    /* Process configuration file */
+
+    autoboot = 1;
+
+    zfs_init();
+
+    /*
+     * Probe the boot drive first - we will try to boot from whatever
+     * pool we find on that drive.
+     */
+    probe_drive(dsk, &spa);
+
+    /*
+     * Probe the rest of the drives that the bios knows about. This
+     * will find any other available pools and it may fill in missing
+     * vdevs for the boot pool.
+     */
+    for (i = 0; i < 4; i++) {
+	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
+	    continue;
+
+	dsk = malloc(sizeof(struct dsk));
+	dsk->drive = i | DRV_HARD;
+	dsk->type = dsk->drive & TYPE_AD;
+	dsk->unit = i;
+	dsk->slice = 0;
+	dsk->part = 0;
+	dsk->start = 0;
+	dsk->init = 0;
+	probe_drive(dsk, 0);
+    }
+
+    /*
+     * If we didn't find a pool on the boot drive, default to the
+     * first pool we found, if any.
+     */
+    if (!spa) {
+	spa = STAILQ_FIRST(&zfs_pools);
+	if (!spa) {
+	    printf("No ZFS pools located, can't boot\n");
+	    for (;;)
+		;
+	}
+    }
+
+    zfs_mount_pool(spa);
+
+    if (zfs_lookup(spa, PATH_CONFIG, &dn) == 0) {
+	off = 0;
+	xfsread(&dn, &off, cmd, sizeof(cmd));
+    }
+
+    if (*cmd) {
+	if (parse())
+	    autoboot = 0;
+	if (!OPT_CHECK(RBX_QUIET))
+	    printf("%s: %s", PATH_CONFIG, cmd);
+	/* Do not process this command twice */
+	*cmd = 0;
+    }
+
+    /*
+     * Try to exec stage 3 boot loader. If interrupted by a keypress,
+     * or in case of failure, try to load a kernel directly instead.
+     */
+
+    if (autoboot && !*kname) {
+	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
+	if (!keyhit(3*SECOND)) {
+	    load();
+	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
+	}
+    }
+
+    /* Present the user with the boot2 prompt. */
+
+    for (;;) {
+	if (!autoboot || !OPT_CHECK(RBX_QUIET))
+	    printf("\nFreeBSD/i386 boot\n"
+		   "Default: %s:%s\n"
+		   "boot: ",
+		   spa->spa_name, kname);
+	if (ioctrl & IO_SERIAL)
+	    sio_flush();
+	if (!autoboot || keyhit(5*SECOND))
+	    getstr();
+	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
+	    putchar('\n');
+	autoboot = 0;
+	if (parse())
+	    putchar('\a');
+	else
+	    load();
+    }
+}
+
+/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
+void
+exit(int x)
+{
+}
+
+static void
+load(void)
+{
+    union {
+	struct exec ex;
+	Elf32_Ehdr eh;
+    } hdr;
+    static Elf32_Phdr ep[2];
+    static Elf32_Shdr es[2];
+    caddr_t p;
+    dnode_phys_t dn;
+    off_t off;
+    uint32_t addr, x;
+    int fmt, i, j;
+
+    if (zfs_lookup(spa, kname, &dn)) {
+	return;
+    }
+    off = 0;
+    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
+	return;
+    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
+	fmt = 0;
+    else if (IS_ELF(hdr.eh))
+	fmt = 1;
+    else {
+	printf("Invalid %s\n", "format");
+	return;
+    }
+    if (fmt == 0) {
+	addr = hdr.ex.a_entry & 0xffffff;
+	p = PTOV(addr);
+	off = PAGE_SIZE;
+	if (xfsread(&dn, &off, p, hdr.ex.a_text))
+	    return;
+	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
+	if (xfsread(&dn, &off, p, hdr.ex.a_data))
+	    return;
+	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
+	bootinfo.bi_symtab = VTOP(p);
+	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
+	p += sizeof(hdr.ex.a_syms);
+	if (hdr.ex.a_syms) {
+	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
+		return;
+	    p += hdr.ex.a_syms;
+	    if (xfsread(&dn, &off, p, sizeof(int)))
+		return;
+	    x = *(uint32_t *)p;
+	    p += sizeof(int);
+	    x -= sizeof(int);
+	    if (xfsread(&dn, &off, p, x))
+		return;
+	    p += x;
+	}
+    } else {
+	off = hdr.eh.e_phoff;
+	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
+	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
+		return;
+	    if (ep[j].p_type == PT_LOAD)
+		j++;
+	}
+	for (i = 0; i < 2; i++) {
+	    p = PTOV(ep[i].p_paddr & 0xffffff);
+	    off = ep[i].p_offset;
+	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
+		return;
+	}
+	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
+	bootinfo.bi_symtab = VTOP(p);
+	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
+	    off = hdr.eh.e_shoff + sizeof(es[0]) *
+		(hdr.eh.e_shstrndx + 1);
+	    if (xfsread(&dn, &off, &es, sizeof(es)))
+		return;
+	    for (i = 0; i < 2; i++) {
+		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
+		p += sizeof(es[i].sh_size);
+		off = es[i].sh_offset;
+		if (xfsread(&dn, &off, p, es[i].sh_size))
+		    return;
+		p += es[i].sh_size;
+	    }
+	}
+	addr = hdr.eh.e_entry & 0xffffff;
+    }
+    bootinfo.bi_esymtab = VTOP(p);
+    bootinfo.bi_kernelname = VTOP(kname);
+    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
+	   bootdev,
+	   KARGS_FLAGS_ZFS,
+	   (uint32_t) spa->spa_guid,
+	   (uint32_t) (spa->spa_guid >> 32),
+	   VTOP(&bootinfo));
+}
+
+static int
+parse()
+{
+    char *arg = cmd;
+    char *ep, *p, *q;
+    const char *cp;
+    //unsigned int drv;
+    int c, i, j;
+
+    while ((c = *arg++)) {
+	if (c == ' ' || c == '\t' || c == '\n')
+	    continue;
+	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
+	ep = p;
+	if (*p)
+	    *p++ = 0;
+	if (c == '-') {
+	    while ((c = *arg++)) {
+		if (c == 'P') {
+		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
+			cp = "yes";
+		    } else {
+			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
+			cp = "no";
+		    }
+		    printf("Keyboard: %s\n", cp);
+		    continue;
+		} else if (c == 'S') {
+		    j = 0;
+		    while ((unsigned int)(i = *arg++ - '0') <= 9)
+			j = j * 10 + i;
+		    if (j > 0 && i == -'0') {
+			comspeed = j;
+			break;
+		    }
+		    /* Fall through to error below ('S' not in optstr[]). */
+		}
+		for (i = 0; c != optstr[i]; i++)
+		    if (i == NOPT - 1)
+			return -1;
+		opts ^= OPT_SET(flags[i]);
+	    }
+	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
+		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
+	    if (ioctrl & IO_SERIAL)
+	        sio_init(115200 / comspeed);
+	} if (c == '?') {
+	    dnode_phys_t dn;
+
+	    if (zfs_lookup(spa, arg, &dn) == 0) {
+		zap_list(spa, &dn);
+	    }
+	    return -1;
+	} else {
+	    arg--;
+
+	    /*
+	     * Report pool status if the comment is 'status'. Lets
+	     * hope no-one wants to load /status as a kernel.
+	     */
+	    if (!strcmp(arg, "status")) {
+		spa_all_status();
+		return -1;
+	    }
+
+	    /*
+	     * If there is a colon, switch pools.
+	     */
+	    q = (char *) strchr(arg, ':');
+	    if (q) {
+		spa_t *newspa;
+
+		*q++ = 0;
+		newspa = spa_find_by_name(arg);
+		if (newspa) {
+		    spa = newspa;
+		    zfs_mount_pool(spa);
+		} else {
+		    printf("\nCan't find ZFS pool %s\n", arg);
+		    return -1;
+		}
+		arg = q;
+	    }
+	    if ((i = ep - arg)) {
+		if ((size_t)i >= sizeof(kname))
+		    return -1;
+		memcpy(kname, arg, i + 1);
+	    }
+	}
+	arg = p;
+    }
+    return 0;
+}
+
+static void
+printf(const char *fmt,...)
+{
+    va_list ap;
+    char buf[10];
+    char *s;
+    unsigned u;
+    int c;
+    int minus;
+    int prec;
+    int len;
+    int pad;
+
+    va_start(ap, fmt);
+    while ((c = *fmt++)) {
+	if (c == '%') {
+	    minus = 0;
+	    prec = 0;
+	nextfmt:
+	    c = *fmt++;
+	    switch (c) {
+	    case '-':
+		minus = 1;
+		goto nextfmt;
+	    case '0':
+	    case '1':
+	    case '2':
+	    case '3':
+	    case '4':
+	    case '5':
+	    case '6':
+	    case '7':
+	    case '8':
+	    case '9':
+		prec = 10 * prec + (c - '0');
+		goto nextfmt;
+	    case 'c':
+		putchar(va_arg(ap, int));
+		continue;
+	    case 's':
+		s = va_arg(ap, char *);
+		if (prec) {
+		    len = strlen(s);
+		    if (len < prec)
+			pad = prec - len;
+		    else
+			pad = 0;
+		    if (minus)
+			while (pad--)
+			    putchar(' ');
+		    for (; *s; s++)
+			putchar(*s);
+		    if (!minus)
+			while (pad--)
+			    putchar(' ');
+		} else {
+		    for (; *s; s++)
+			putchar(*s);
+		}
+		continue;
+	    case 'u':
+		u = va_arg(ap, unsigned);
+		s = buf;
+		do
+		    *s++ = '0' + u % 10U;
+		while (u /= 10U);
+		while (--s >= buf)
+		    putchar(*s);
+		continue;
+	    }
+	}
+	putchar(c);
+    }
+    va_end(ap);
+    return;
+}
+
+static void
+putchar(int c)
+{
+    if (c == '\n')
+	xputc('\r');
+    xputc(c);
+}
+
+static int
+drvread(struct dsk *dsk, void *buf, unsigned lba, unsigned nblk)
+{
+    static unsigned c = 0x2d5c7c2f;
+
+    lba += dsk->start;
+    if (!OPT_CHECK(RBX_QUIET))
+	printf("%c\b", c = c << 8 | c >> 24);
+    v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
+    v86.addr = XREADORG;		/* call to xread in boot1 */
+    v86.es = VTOPSEG(buf);
+    v86.eax = lba;
+    v86.ebx = VTOPOFF(buf);
+    v86.ecx = lba >> 16;
+    v86.edx = nblk << 8 | dsk->drive;
+    v86int();
+    v86.ctl = V86_FLAGS;
+    if (V86_CY(v86.efl)) {
+	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
+	return -1;
+    }
+    return 0;
+}
+
+static int
+keyhit(unsigned ticks)
+{
+    uint32_t t0, t1;
+
+    if (OPT_CHECK(RBX_NOINTR))
+	return 0;
+    t0 = 0;
+    for (;;) {
+	if (xgetc(1))
+	    return 1;
+	t1 = *(uint32_t *)PTOV(0x46c);
+	if (!t0)
+	    t0 = t1;
+	if (t1 < t0 || t1 >= t0 + ticks)
+	    return 0;
+    }
+}
+
+static int
+xputc(int c)
+{
+    if (ioctrl & IO_KEYBOARD)
+	putc(c);
+    if (ioctrl & IO_SERIAL)
+	sio_putc(c);
+    return c;
+}
+
+static int
+xgetc(int fn)
+{
+    if (OPT_CHECK(RBX_NOINTR))
+	return 0;
+    for (;;) {
+	if (ioctrl & IO_KEYBOARD && getc(1))
+	    return fn ? 1 : getc(0);
+	if (ioctrl & IO_SERIAL && sio_ischar())
+	    return fn ? 1 : sio_getc();
+	if (fn)
+	    return 0;
+    }
+}
+
+static int
+getc(int fn)
+{
+    /*
+     * The extra comparison against zero is an attempt to work around
+     * what appears to be a bug in QEMU and Bochs. Both emulators
+     * sometimes report a key-press with scancode one and ascii zero
+     * when no such key is pressed in reality. As far as I can tell,
+     * this only happens shortly after a reboot.
+     */
+    v86.addr = 0x16;
+    v86.eax = fn << 8;
+    v86int();
+    return fn == 0 ? v86.eax & 0xff : (!V86_ZR(v86.efl) && (v86.eax & 0xff));
+}
--- a/sys/boot/i386/zfsboot/zfsldr.S
+++ b/sys/boot/i386/zfsboot/zfsldr.S
@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 1998 Robert Nordier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are freely
+ * permitted provided that the above copyright notice and this
+ * paragraph and the following disclaimer are duplicated in all
+ * such forms.
+ *
+ * This software is provided "AS IS" and without any express or
+ * implied warranties, including, without limitation, the implied
+ * warranties of merchantability and fitness for a particular
+ * purpose.
+ *
+ * $FreeBSD$
+ */
+
+/* Memory Locations */
+		.set MEM_REL,0x700		# Relocation address
+		.set MEM_ARG,0x900		# Arguments
+		.set MEM_ORG,0x7c00		# Origin
+		.set MEM_BUF,0x8000		# Load area
+		.set MEM_BTX,0x9000		# BTX start
+		.set MEM_JMP,0x9010		# BTX entry point
+		.set MEM_USR,0xa000		# Client start
+		.set BDA_BOOT,0x472		# Boot howto flag
+	
+/* Partition Constants */
+		.set PRT_OFF,0x1be		# Partition offset
+		.set PRT_NUM,0x4		# Partitions
+		.set PRT_BSD,0xa5		# Partition type
+
+/* Flag Bits */
+		.set FL_PACKET,0x80		# Packet mode
+
+/* Misc. Constants */
+		.set SIZ_PAG,0x1000		# Page size
+		.set SIZ_SEC,0x200		# Sector size
+
+		.set NSECT,0x40
+		.globl start
+		.globl xread
+		.code16
+
+start:		jmp main			# Start recognizably
+
+/*
+ * This is the start of a standard BIOS Parameter Block (BPB). Most bootable
+ * FAT disks have this at the start of their MBR. While normal BIOS's will
+ * work fine without this section, IBM's El Torito emulation "fixes" up the
+ * BPB by writing into the memory copy of the MBR. Rather than have data
+ * written into our xread routine, we'll define a BPB to work around it.
+ * The data marked with (T) indicates a field required for a ThinkPad to
+ * recognize the disk and (W) indicates fields written from IBM BIOS code.
+ * The use of the BPB is based on what OpenBSD and NetBSD implemented in
+ * their boot code but the required fields were determined by trial and error.
+ *
+ * Note: If additional space is needed in boot1, one solution would be to
+ * move the "prompt" message data (below) to replace the OEM ID.
+ */
+		.org 0x03, 0x00
+oemid:		.space 0x08, 0x00	# OEM ID
+
+		.org 0x0b, 0x00
+bpb:		.word   512		# sector size (T)
+		.byte	0		# sectors/clustor
+		.word	0		# reserved sectors
+		.byte	0		# number of FATs
+		.word	0		# root entries
+		.word	0		# small sectors
+		.byte	0		# media type (W)
+		.word	0		# sectors/fat
+		.word	18		# sectors per track (T)
+		.word	2		# number of heads (T)
+		.long	0		# hidden sectors (W)
+		.long	0		# large sectors
+
+		.org 0x24, 0x00
+ebpb:		.byte	0		# BIOS physical drive number (W)
+
+		.org 0x25,0x90
+/*
+ * Trampoline used by boot2 to call read to read data from the disk via
+ * the BIOS.  Call with:
+ *
+ * %cx:%ax	- long    - LBA to read in
+ * %es:(%bx)	- caddr_t - buffer to read data into
+ * %dl		- byte    - drive to read from
+ * %dh		- byte    - num sectors to read
+ */
+
+xread:		push %ss			# Address
+		pop %ds				#  data
+/*
+ * Setup an EDD disk packet and pass it to read
+ */
+xread.1:					# Starting
+		pushl $0x0			#  absolute
+		push %cx			#  block
+		push %ax			#  number
+		push %es			# Address of
+		push %bx			#  transfer buffer
+		xor %ax,%ax			# Number of
+		movb %dh,%al			#  blocks to
+		push %ax			#  transfer
+		push $0x10			# Size of packet
+		mov %sp,%bp			# Packet pointer
+		callw read			# Read from disk
+		lea 0x10(%bp),%sp		# Clear stack
+		lret				# To far caller
+/*
+ * Load the rest of boot2 and BTX up, copy the parts to the right locations,
+ * and start it all up.
+ */
+
+/*
+ * Setup the segment registers to flat addressing (segment 0) and setup the
+ * stack to end just below the start of our code.
+ */
+main:		cld				# String ops inc
+		xor %cx,%cx			# Zero
+		mov %cx,%es			# Address
+		mov %cx,%ds			#  data
+		mov %cx,%ss			# Set up
+		mov $start,%sp			#  stack
+/*
+ * Relocate ourself to MEM_REL.  Since %cx == 0, the inc %ch sets
+ * %cx == 0x100.
+ */
+		mov %sp,%si			# Source
+		mov $MEM_REL,%di		# Destination
+		incb %ch			# Word count
+		rep				# Copy
+		movsw				#  code
+/*
+ * If we are on a hard drive, then load the MBR and look for the first
+ * FreeBSD slice.  We use the fake partition entry below that points to
+ * the MBR when we call nread.  The first pass looks for the first active
+ * FreeBSD slice.  The second pass looks for the first non-active FreeBSD
+ * slice if the first one fails.
+ */
+		mov $part4,%si			# Partition
+		cmpb $0x80,%dl			# Hard drive?
+		jb main.4			# No
+		movb $0x1,%dh			# Block count
+		callw nread			# Read MBR
+		mov $0x1,%cx	 		# Two passes
+main.1: 	mov $MEM_BUF+PRT_OFF,%si	# Partition table
+		movb $0x1,%dh			# Partition
+main.2: 	cmpb $PRT_BSD,0x4(%si)		# Our partition type?
+		jne main.3			# No
+		jcxz main.5			# If second pass
+		testb $0x80,(%si)		# Active?
+		jnz main.5			# Yes
+main.3: 	add $0x10,%si	 		# Next entry
+		incb %dh			# Partition
+		cmpb $0x1+PRT_NUM,%dh		# In table?
+		jb main.2			# Yes
+		dec %cx				# Do two
+		jcxz main.1			#  passes
+/*
+ * If we get here, we didn't find any FreeBSD slices at all, so print an
+ * error message and die.
+ */
+		mov $msg_part,%si		# Message
+		jmp error			# Error
+/*
+ * Floppies use partition 0 of drive 0.
+ */
+main.4: 	xor %dx,%dx			# Partition:drive
+
+/*
+ * Ok, we have a slice and drive in %dx now, so use that to locate and
+ * load boot2.  %si references the start of the slice we are looking
+ * for, so go ahead and load up the 64 sectors starting at sector 1024
+ * (i.e. after the two vdev labels).  We don't have do anything fancy
+ * here to allow for an extra copy of boot1 and a partition table
+ * (compare to this section of the UFS bootstrap) so we just load it
+ * all at 0x8000. The first part of boot2 is BTX, which wants to run
+ * at 0x9000. The boot2.bin binary starts right after the end of BTX,
+ * so we have to figure out where the start of it is and then move the
+ * binary to 0xc000. After we have moved the client, we relocate BTX
+ * itself to 0x9000 - doing it in this order means that none of the
+ * memcpy regions overlap which would corrupt the copy.  Normally, BTX
+ * clients start at MEM_USR, or 0xa000, but when we use btxld to
+ * create boot2, we use an entry point of 0x2000.  That entry point is
+ * relative to MEM_USR; thus boot2.bin starts at 0xc000.
+ *
+ * The load area and the target area for the client overlap so we have
+ * to use a decrementing string move. We also play segment register
+ * games with the destination address for the move so that the client
+ * can be larger than 16k (which would overflow the zero segment since
+ * the client starts at 0xc000). Relocating BTX is easy since the load
+ * area and target area do not overlap.
+ */
+main.5: 	mov %dx,MEM_ARG			# Save args
+		movb $NSECT,%dh			# Sector count
+		movw $1024,%ax			# Offset to boot2
+		callw nread.1			# Read disk
+main.6:		mov $MEM_BUF,%si		# BTX (before reloc)
+		mov 0xa(%si),%bx		# Get BTX length and set
+		mov $NSECT*SIZ_SEC-1,%di	# Size of load area (less one)
+		mov %di,%si			# End of load
+		add $MEM_BUF,%si		#  area
+		sub %bx,%di			# End of client, 0xc000 rel
+		mov %di,%cx			# Size of
+		inc %cx				#  client
+		mov $(MEM_USR+2*SIZ_PAG)>>4,%dx	# Segment
+		mov %dx,%es			#   addressing 0xc000
+		std				# Move with decrement
+		rep				# Relocate
+		movsb				#  client
+		mov %ds,%dx			# Back to
+		mov %dx,%es			#  zero segment
+		mov $MEM_BUF,%si		# BTX (before reloc)
+		mov $MEM_BTX,%di		# BTX
+		mov %bx,%cx			# Get BTX length
+		cld				# Increment this time
+		rep				# Relocate
+		movsb				#  BTX
+
+/*
+ * Enable A20 so we can access memory above 1 meg.
+ * Use the zero-valued %cx as a timeout for embedded hardware which do not
+ * have a keyboard controller.
+ */
+seta20: 	cli				# Disable interrupts
+seta20.1:	dec %cx				# Timeout?
+		jz seta20.3			# Yes
+		inb $0x64,%al			# Get status
+		testb $0x2,%al			# Busy?
+		jnz seta20.1			# Yes
+		movb $0xd1,%al			# Command: Write
+		outb %al,$0x64			#  output port
+seta20.2:	inb $0x64,%al			# Get status
+		testb $0x2,%al			# Busy?
+		jnz seta20.2			# Yes
+		movb $0xdf,%al			# Enable
+		outb %al,$0x60			#  A20
+seta20.3:	sti				# Enable interrupts
+
+		jmp start+MEM_JMP-MEM_ORG	# Start BTX
+
+
+/*
+ * Trampoline used to call read from within boot1.
+ */
+nread:		xor %ax,%ax			# Sector offset in partition
+nread.1:	mov $MEM_BUF,%bx		# Transfer buffer
+		add 0x8(%si),%ax		# Get
+		mov 0xa(%si),%cx		#  LBA
+		push %cs			# Read from
+		callw xread.1	 		#  disk
+		jnc return			# If success, return
+		mov $msg_read,%si		# Otherwise, set the error
+						#  message and fall through to
+						#  the error routine
+/*
+ * Print out the error message pointed to by %ds:(%si) followed
+ * by a prompt, wait for a keypress, and then reboot the machine.
+ */
+error:		callw putstr			# Display message
+		mov $prompt,%si			# Display
+		callw putstr			#  prompt
+		xorb %ah,%ah			# BIOS: Get
+		int $0x16			#  keypress
+		movw $0x1234, BDA_BOOT		# Do a warm boot
+		ljmp $0xffff,$0x0		# reboot the machine
+/*
+ * Display a null-terminated string using the BIOS output.
+ */
+putstr.0:	mov $0x7,%bx	 		# Page:attribute
+		movb $0xe,%ah			# BIOS: Display
+		int $0x10			#  character
+putstr: 	lodsb				# Get char
+		testb %al,%al			# End of string?
+		jne putstr.0			# No
+
+/*
+ * Overused return code.  ereturn is used to return an error from the
+ * read function.  Since we assume putstr succeeds, we (ab)use the
+ * same code when we return from putstr.
+ */
+ereturn:	movb $0x1,%ah			# Invalid
+		stc				#  argument
+return: 	retw				# To caller
+/*
+ * Reads sectors from the disk.  If EDD is enabled, then check if it is
+ * installed and use it if it is.  If it is not installed or not enabled, then
+ * fall back to using CHS.  Since we use a LBA, if we are using CHS, we have to
+ * fetch the drive parameters from the BIOS and divide it out ourselves.
+ * Call with:
+ *
+ * %dl	- byte     - drive number
+ * stack - 10 bytes - EDD Packet
+ */
+read:		testb $FL_PACKET,%cs:MEM_REL+flags-start # LBA support enabled?
+		jz read.1			# No, use CHS
+		cmpb $0x80,%dl			# Hard drive?
+		jb read.1			# No, use CHS
+		mov $0x55aa,%bx			# Magic
+		push %dx			# Save
+		movb $0x41,%ah			# BIOS: Check
+		int $0x13			#  extensions present
+		pop %dx				# Restore
+		jc read.1			# If error, use CHS
+		cmp $0xaa55,%bx			# Magic?
+		jne read.1			# No, so use CHS
+		testb $0x1,%cl			# Packet interface?
+		jz read.1			# No, so use CHS
+		mov %bp,%si			# Disk packet
+		movb $0x42,%ah			# BIOS: Extended
+		int $0x13			#  read
+		retw				# To caller
+#if 0	
+read.1:	 	push %dx			# Save
+		movb $0x8,%ah			# BIOS: Get drive
+		int $0x13			#  parameters
+		movb %dh,%ch			# Max head number
+		pop %dx				# Restore
+		jc return			# If error
+		andb $0x3f,%cl			# Sectors per track
+		jz ereturn			# If zero
+		cli				# Disable interrupts
+		mov 0x8(%bp),%eax		# Get LBA
+		push %dx			# Save
+		movzbl %cl,%ebx			# Divide by
+		xor %edx,%edx			#  sectors
+		div %ebx			#  per track
+		movb %ch,%bl			# Max head number
+		movb %dl,%ch			# Sector number
+		inc %bx				# Divide by
+		xorb %dl,%dl			#  number
+		div %ebx			#  of heads
+		movb %dl,%bh			# Head number
+		pop %dx				# Restore
+		cmpl $0x3ff,%eax		# Cylinder number supportable?
+		sti				# Enable interrupts
+		ja ereturn			# No, return an error
+		xchgb %al,%ah			# Set up cylinder
+		rorb $0x2,%al			#  number
+		orb %ch,%al			# Merge
+		inc %ax				#  sector
+		xchg %ax,%cx	 		#  number
+		movb %bh,%dh			# Head number
+		subb %ah,%al			# Sectors this track
+		mov 0x2(%bp),%ah		# Blocks to read
+		cmpb %ah,%al			# To read
+		jb read.2			#  this
+#ifdef	TRACK_AT_A_TIME
+		movb %ah,%al			#  track
+#else
+		movb $1,%al			#  one sector
+#endif
+read.2: 	mov $0x5,%di	 		# Try count
+read.3: 	les 0x4(%bp),%bx		# Transfer buffer
+		push %ax			# Save
+		movb $0x2,%ah			# BIOS: Read
+		int $0x13			#  from disk
+		pop %bx				# Restore
+		jnc read.4			# If success
+		dec %di				# Retry?
+		jz read.6			# No
+		xorb %ah,%ah			# BIOS: Reset
+		int $0x13			#  disk system
+		xchg %bx,%ax	 		# Block count
+		jmp read.3			# Continue
+read.4: 	movzbw %bl,%ax	 		# Sectors read
+		add %ax,0x8(%bp)		# Adjust
+		jnc read.5			#  LBA,
+		incw 0xa(%bp)	 		#  transfer
+read.5: 	shlb %bl			#  buffer
+		add %bl,0x5(%bp)		#  pointer,
+		sub %al,0x2(%bp)		#  block count
+		ja read.1			# If not done
+read.6: 	retw				# To caller
+#else
+read.1:		mov $msg_chs,%si
+		jmp error
+msg_chs:	.asciz "CHS not supported"
+#endif
+
+/* Messages */
+
+msg_read:	.asciz "Read"
+msg_part:	.asciz "Boot"
+
+prompt: 	.asciz " error\r\n"
+
+flags:		.byte FLAGS			# Flags
+
+		.org PRT_OFF,0x90
+
+/* Partition table */
+
+		.fill 0x30,0x1,0x0
+part4:		.byte 0x80, 0x00, 0x01, 0x00
+		.byte 0xa5, 0xfe, 0xff, 0xff
+		.byte 0x00, 0x00, 0x00, 0x00
+		.byte 0x50, 0xc3, 0x00, 0x00	# 50000 sectors long, bleh
+
+		.word 0xaa55			# Magic number
--- a/sys/boot/zfs/Makefile
+++ b/sys/boot/zfs/Makefile
@ -0,0 +1,29 @@
+# $FreeBSD$
+
+LIB=		zfsboot
+INTERNALLIB=
+
+SRCS+=		zfs.c
+
+CFLAGS+=	-I${.CURDIR}/../common -I${.CURDIR}/../.. -I.
+CFLAGS+=	-I${.CURDIR}/../../../lib/libstand
+CFLAGS+=	-I${.CURDIR}/../../cddl/boot/zfs
+
+# XXX need arch-specific bootstrap CFLAGS here
+# 
+CFLAGS+=	-ffreestanding -mpreferred-stack-boundary=2 \
+		-mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3
+
+CFLAGS+=	-Wformat -Wall
+
+.if ${MACHINE_ARCH} == "amd64"
+CLEANFILES+=    machine
+machine:
+	ln -sf ${.CURDIR}/../../../i386/include machine
+.endif
+
+.include <bsd.lib.mk>
+
+.if ${MACHINE_ARCH} == "amd64"
+beforedepend ${OBJS}: machine
+.endif
--- a/sys/boot/zfs/zfs.c
+++ b/sys/boot/zfs/zfs.c
@ -0,0 +1,514 @@
+/*-
+ * Copyright (c) 2007 Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ *	Stand-alone file reading package.
+ */
+
+#include <sys/param.h>
+#include <sys/disklabel.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stand.h>
+#include <bootstrap.h>
+
+#include "zfsimpl.c"
+
+static int	zfs_open(const char *path, struct open_file *f);
+static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
+static int	zfs_close(struct open_file *f);
+static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
+static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
+static int	zfs_stat(struct open_file *f, struct stat *sb);
+static int	zfs_readdir(struct open_file *f, struct dirent *d);
+
+struct devsw zfs_dev;
+
+struct fs_ops zfs_fsops = {
+	"zfs",
+	zfs_open,
+	zfs_close,
+	zfs_read,
+	zfs_write,
+	zfs_seek,
+	zfs_stat,
+	zfs_readdir
+};
+
+/*
+ * In-core open file.
+ */
+struct file {
+	off_t		f_seekp;	/* seek pointer */
+	dnode_phys_t	f_dnode;
+	uint64_t	f_zap_type;	/* zap type for readdir */
+	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
+	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
+};
+
+/*
+ * Open a file.
+ */
+static int
+zfs_open(const char *upath, struct open_file *f)
+{
+	spa_t *spa = (spa_t *) f->f_devdata;
+	struct file *fp;
+	int rc;
+
+	if (f->f_dev != &zfs_dev)
+		return (EINVAL);
+
+	rc = zfs_mount_pool(spa);
+	if (rc)
+		return (rc);
+
+	/* allocate file system specific data structure */
+	fp = malloc(sizeof(struct file));
+	bzero(fp, sizeof(struct file));
+	f->f_fsdata = (void *)fp;
+
+	if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
+		printf("Unexpected object set type %lld\n",
+		    spa->spa_root_objset.os_type);
+		rc = EIO;
+		goto out;
+	}
+
+	rc = zfs_lookup(spa, upath, &fp->f_dnode);
+	if (rc)
+		goto out;
+
+	fp->f_seekp = 0;
+out:
+	if (rc) {
+		f->f_fsdata = NULL;
+		free(fp);
+	}
+	return (rc);
+}
+
+static int
+zfs_close(struct open_file *f)
+{
+	struct file *fp = (struct file *)f->f_fsdata;
+
+	dnode_cache_obj = 0;
+	f->f_fsdata = (void *)0;
+	if (fp == (struct file *)0)
+		return (0);
+
+	free(fp);
+	return (0);
+}
+
+/*
+ * Copy a portion of a file into kernel memory.
+ * Cross block boundaries when necessary.
+ */
+static int
+zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
+{
+	spa_t *spa = (spa_t *) f->f_devdata;
+	struct file *fp = (struct file *)f->f_fsdata;
+	const znode_phys_t *zp = (const znode_phys_t *) fp->f_dnode.dn_bonus;
+	size_t n;
+	int rc;
+
+	n = size;
+	if (fp->f_seekp + n > zp->zp_size)
+		n = zp->zp_size - fp->f_seekp;
+	
+	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
+	if (rc)
+		return (rc);
+
+	if (0) {
+	    int i;
+	    for (i = 0; i < n; i++)
+		putchar(((char*) start)[i]);
+	}
+	fp->f_seekp += n;
+	if (resid)
+		*resid = size - n;
+
+	return (0);
+}
+
+/*
+ * Don't be silly - the bootstrap has no business writing anything.
+ */
+static int
+zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
+{
+
+	return (EROFS);
+}
+
+static off_t
+zfs_seek(struct open_file *f, off_t offset, int where)
+{
+	struct file *fp = (struct file *)f->f_fsdata;
+	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
+
+	switch (where) {
+	case SEEK_SET:
+		fp->f_seekp = offset;
+		break;
+	case SEEK_CUR:
+		fp->f_seekp += offset;
+		break;
+	case SEEK_END:
+		fp->f_seekp = zp->zp_size - offset;
+		break;
+	default:
+		errno = EINVAL;
+		return (-1);
+	}
+	return (fp->f_seekp);
+}
+
+static int
+zfs_stat(struct open_file *f, struct stat *sb)
+{
+	struct file *fp = (struct file *)f->f_fsdata;
+	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
+
+	/* only important stuff */
+	sb->st_mode = zp->zp_mode;
+	sb->st_uid = zp->zp_uid;
+	sb->st_gid = zp->zp_gid;
+	sb->st_size = zp->zp_size;
+
+	return (0);
+}
+
+static int
+zfs_readdir(struct open_file *f, struct dirent *d)
+{
+	spa_t *spa = (spa_t *) f->f_devdata;
+	struct file *fp = (struct file *)f->f_fsdata;
+	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
+	mzap_ent_phys_t mze;
+	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
+	int rc;
+
+	if ((zp->zp_mode >> 12) != 0x4) {
+		return (ENOTDIR);
+	}
+
+	/*
+	 * If this is the first read, get the zap type.
+	 */
+	if (fp->f_seekp == 0) {
+		rc = dnode_read(spa, &fp->f_dnode,
+				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
+		if (rc)
+			return (rc);
+
+		if (fp->f_zap_type == ZBT_MICRO) {
+			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
+		} else {
+			rc = dnode_read(spa, &fp->f_dnode,
+					offsetof(zap_phys_t, zap_num_leafs),
+					&fp->f_num_leafs,
+					sizeof(fp->f_num_leafs));
+			if (rc)
+				return (rc);
+
+			fp->f_seekp = bsize;
+			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
+			rc = dnode_read(spa, &fp->f_dnode,
+					fp->f_seekp,
+					fp->f_zap_leaf,
+					bsize);
+			if (rc)
+				return (rc);
+		}
+	}
+
+	if (fp->f_zap_type == ZBT_MICRO) {
+	mzap_next:
+		if (fp->f_seekp >= bsize)
+			return (ENOENT);
+
+		rc = dnode_read(spa, &fp->f_dnode,
+				fp->f_seekp, &mze, sizeof(mze));
+		fp->f_seekp += sizeof(mze);
+
+		if (!mze.mze_name[0])
+			goto mzap_next;
+
+		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
+		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
+		strcpy(d->d_name, mze.mze_name);
+		d->d_namlen = strlen(d->d_name);
+		return (0);
+	} else {
+		zap_leaf_t zl;
+		zap_leaf_chunk_t *zc, *nc;
+		int chunk;
+		size_t namelen;
+		char *p;
+		uint64_t value;
+
+		/*
+		 * Initialise this so we can use the ZAP size
+		 * calculating macros.
+		 */
+		zl.l_bs = ilog2(bsize);
+		zl.l_phys = fp->f_zap_leaf;
+
+		/*
+		 * Figure out which chunk we are currently looking at
+		 * and consider seeking to the next leaf. We use the
+		 * low bits of f_seekp as a simple chunk index.
+		 */
+	fzap_next:
+		chunk = fp->f_seekp & (bsize - 1);
+		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
+			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
+			chunk = 0;
+
+			/*
+			 * Check for EOF and read the new leaf.
+			 */
+			if (fp->f_seekp >= bsize * fp->f_num_leafs)
+				return (ENOENT);
+
+			rc = dnode_read(spa, &fp->f_dnode,
+					fp->f_seekp,
+					fp->f_zap_leaf,
+					bsize);
+			if (rc)
+				return (rc);
+		}
+
+		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
+		fp->f_seekp++;
+		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
+			goto fzap_next;
+
+		namelen = zc->l_entry.le_name_length;
+		if (namelen > sizeof(d->d_name))
+			namelen = sizeof(d->d_name);
+
+		/*
+		 * Paste the name back together.
+		 */
+		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
+		p = d->d_name;
+		while (namelen > 0) {
+			int len;
+			len = namelen;
+			if (len > ZAP_LEAF_ARRAY_BYTES)
+				len = ZAP_LEAF_ARRAY_BYTES;
+			memcpy(p, nc->l_array.la_array, len);
+			p += len;
+			namelen -= len;
+			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
+		}
+		d->d_name[sizeof(d->d_name) - 1] = 0;
+
+		/*
+		 * Assume the first eight bytes of the value are
+		 * a uint64_t.
+		 */
+		value = fzap_leaf_value(&zl, zc);
+
+		d->d_fileno = ZFS_DIRENT_OBJ(value);
+		d->d_type = ZFS_DIRENT_TYPE(value);
+		d->d_namlen = strlen(d->d_name);
+
+		return (0);
+	}
+}
+
+static int
+vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
+{
+	int fd;
+
+	fd = (uintptr_t) priv;
+	lseek(fd, offset, SEEK_SET);
+	if (read(fd, buf, size) == size) {
+		return 0;
+	} else {
+		return (EIO);
+	}
+}
+
+/*
+ * Convert a pool guid to a 'unit number' suitable for use with zfs_dev_open.
+ */
+int
+zfs_guid_to_unit(uint64_t guid)
+{
+	spa_t *spa;
+	int unit;
+
+	unit = 0;
+	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
+		if (spa->spa_guid == guid)
+			return unit;
+		unit++;
+	}
+	return (-1);
+}
+
+static int
+zfs_dev_init(void) 
+{
+	char devname[512];
+	int unit, slice;
+	int fd;
+
+	/*
+	 * Open all the disks we can find and see if we can reconstruct
+	 * ZFS pools from them. Bogusly assumes that the disks are named
+	 * diskN or diskNsM.
+	 */
+	zfs_init();
+	for (unit = 0; unit < 32 /* XXX */; unit++) {
+		sprintf(devname, "disk%d:", unit);
+		fd = open(devname, O_RDONLY);
+		if (fd == -1)
+			continue;
+
+		/*
+		 * If we find a vdev, the zfs code will eat the fd, otherwise
+		 * we close it.
+		 */
+		if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
+			close(fd);
+
+		for (slice = 1; slice <= 4; slice++) {
+			sprintf(devname, "disk%ds%d:", unit, slice);
+			fd = open(devname, O_RDONLY);
+			if (fd == -1)
+				continue;
+			if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
+				close(fd);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Print information about ZFS pools
+ */
+static void
+zfs_dev_print(int verbose)
+{
+	spa_t *spa;
+	char line[80];
+	int unit;
+
+	if (verbose) {
+		spa_all_status();
+		return;
+	}
+	unit = 0;
+	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
+		sprintf(line, "    zfs%d:   %s\n", unit, spa->spa_name);
+		pager_output(line);
+		unit++;
+	}
+}
+
+/*
+ * Attempt to open the pool described by (dev) for use by (f).
+ */
+static int 
+zfs_dev_open(struct open_file *f, ...)
+{
+	va_list		args;
+	struct devdesc	*dev;
+	int		unit, i;
+	spa_t		*spa;
+
+	va_start(args, f);
+	dev = va_arg(args, struct devdesc*);
+	va_end(args);
+
+	/*
+	 * We mostly ignore the stuff that devopen sends us. For now,
+	 * use the unit to find a pool - later we will override the
+	 * devname parsing so that we can name a pool and a fs within
+	 * the pool.
+	 */
+	unit = dev->d_unit;
+	free(dev);
+	
+	i = 0;
+	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
+		if (i == unit)
+			break;
+		i++;
+	}
+	if (!spa) {
+		return (ENXIO);
+	}
+
+	f->f_devdata = spa;
+	return (0);
+}
+
+static int 
+zfs_dev_close(struct open_file *f)
+{
+
+	f->f_devdata = NULL;
+	return (0);
+}
+
+static int 
+zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
+{
+
+	return (ENOSYS);
+}
+
+struct devsw zfs_dev = {
+	.dv_name = "zfs", 
+	.dv_type = DEVT_ZFS, 
+	.dv_init = zfs_dev_init,
+	.dv_strategy = zfs_dev_strategy, 
+	.dv_open = zfs_dev_open, 
+	.dv_close = zfs_dev_close, 
+	.dv_ioctl = noioctl,
+	.dv_print = zfs_dev_print,
+	.dv_cleanup = NULL
+};
--- a/sys/boot/zfs/zfsimpl.c
+++ b/sys/boot/zfs/zfsimpl.c
--- a/sys/cddl/boot/zfs/README
+++ b/sys/cddl/boot/zfs/README
@ -0,0 +1,14 @@
+$FreeBSD$
+
+This directory contains various files derived from CDDL sources that
+are used by the ZFS bootstrap:
+
+    fletcher.c			checksum support
+    sha256.c			checksum support
+    lzjb.c			compression support
+    zfssubr.c			mostly checksum and compression support
+    zfsimpl.h			mostly describing the physical layout
+
+The files fletcher.c, lzjb.c and sha256.c are largely identical to the
+ZFS base code (with write support removed) and could be shared but
+that might complicate future imports from OpenSolaris.
--- a/sys/cddl/boot/zfs/fletcher.c
+++ b/sys/cddl/boot/zfs/fletcher.c
@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*#pragma ident	"%Z%%M%	%I%	%E% SMI"*/
+
+static void
+fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint64_t *ip = buf;
+	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
+	uint64_t a0, b0, a1, b1;
+
+	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+		a0 += ip[0];
+		a1 += ip[1];
+		b0 += a0;
+		b1 += a1;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+}
+
+static void
+fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+	uint64_t a, b, c, d;
+
+	for (a = b = c = d = 0; ip < ipend; ip++) {
+		a += ip[0];
+		b += a;
+		c += b;
+		d += c;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
--- a/sys/cddl/boot/zfs/lzjb.c
+++ b/sys/cddl/boot/zfs/lzjb.c
@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*#pragma ident	"%Z%%M%	%I%	%E% SMI"*/
+
+/*
+ * We keep our own copy of this algorithm for 2 main reasons:
+ * 	1. If we didn't, anyone modifying common/os/compress.c would
+ *         directly break our on disk format
+ * 	2. Our version of lzjb does not have a number of checks that the
+ *         common/os version needs and uses
+ * In particular, we are adding the "feature" that compress() can
+ * take a destination buffer size and return -1 if the data will not
+ * compress to d_len or less.
+ */
+
+#define	MATCH_BITS	6
+#define	MATCH_MIN	3
+#define	MATCH_MAX	((1 << MATCH_BITS) + (MATCH_MIN - 1))
+#define	OFFSET_MASK	((1 << (16 - MATCH_BITS)) - 1)
+#define	LEMPEL_SIZE	256
+
+/*ARGSUSED*/
+static int
+lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
+{
+	unsigned char *src = s_start;
+	unsigned char *dst = d_start;
+	unsigned char *d_end = (unsigned char *)d_start + d_len;
+	unsigned char *cpy, copymap = 0;
+	int copymask = 1 << (NBBY - 1);
+
+	while (dst < d_end) {
+		if ((copymask <<= 1) == (1 << NBBY)) {
+			copymask = 1;
+			copymap = *src++;
+		}
+		if (copymap & copymask) {
+			int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN;
+			int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK;
+			src += 2;
+			if ((cpy = dst - offset) < (unsigned char *)d_start)
+				return (-1);
+			while (--mlen >= 0 && dst < d_end)
+				*dst++ = *cpy++;
+		} else {
+			*dst++ = *src++;
+		}
+	}
+	return (0);
+}
--- a/sys/cddl/boot/zfs/sha256.c
+++ b/sys/cddl/boot/zfs/sha256.c
@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*#pragma ident	"%Z%%M%	%I%	%E% SMI"*/
+
+/*
+ * SHA-256 checksum, as specified in FIPS 180-2, available at:
+ * http://csrc.nist.gov/cryptval
+ *
+ * This is a very compact implementation of SHA-256.
+ * It is designed to be simple and portable, not to be fast.
+ */
+
+/*
+ * The literal definitions according to FIPS180-2 would be:
+ *
+ * 	Ch(x, y, z)     (((x) & (y)) ^ ((~(x)) & (z)))
+ * 	Maj(x, y, z)    (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+ *
+ * We use logical equivalents which require one less op.
+ */
+#define	Ch(x, y, z)	((z) ^ ((x) & ((y) ^ (z))))
+#define	Maj(x, y, z)	(((x) & (y)) ^ ((z) & ((x) ^ (y))))
+#define	Rot32(x, s)	(((x) >> s) | ((x) << (32 - s)))
+#define	SIGMA0(x)	(Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
+#define	SIGMA1(x)	(Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
+#define	sigma0(x)	(Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
+#define	sigma1(x)	(Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
+
+static const uint32_t SHA256_K[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+static void
+SHA256Transform(uint32_t *H, const uint8_t *cp)
+{
+	uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
+
+	for (t = 0; t < 16; t++, cp += 4)
+		W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3];
+
+	for (t = 16; t < 64; t++)
+		W[t] = sigma1(W[t - 2]) + W[t - 7] +
+		    sigma0(W[t - 15]) + W[t - 16];
+
+	a = H[0]; b = H[1]; c = H[2]; d = H[3];
+	e = H[4]; f = H[5]; g = H[6]; h = H[7];
+
+	for (t = 0; t < 64; t++) {
+		T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
+		T2 = SIGMA0(a) + Maj(a, b, c);
+		h = g; g = f; f = e; e = d + T1;
+		d = c; c = b; b = a; a = T1 + T2;
+	}
+
+	H[0] += a; H[1] += b; H[2] += c; H[3] += d;
+	H[4] += e; H[5] += f; H[6] += g; H[7] += h;
+}
+
+static void
+zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+	    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+	uint8_t pad[128];
+	int padsize = size & 63;
+	int i;
+
+	for (i = 0; i < size - padsize; i += 64)
+		SHA256Transform(H, (uint8_t *)buf + i);
+
+	for (i = 0; i < padsize; i++)
+		pad[i] = ((uint8_t *)buf)[i];
+
+	for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
+		pad[padsize] = 0;
+
+	for (i = 0; i < 8; i++)
+		pad[padsize++] = (size << 3) >> (56 - 8 * i);
+
+	for (i = 0; i < padsize; i += 64)
+		SHA256Transform(H, pad + i);
+
+	ZIO_SET_CHECKSUM(zcp,
+	    (uint64_t)H[0] << 32 | H[1],
+	    (uint64_t)H[2] << 32 | H[3],
+	    (uint64_t)H[4] << 32 | H[5],
+	    (uint64_t)H[6] << 32 | H[7]);
+}
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
--- a/sys/cddl/boot/zfs/zfssubr.c
+++ b/sys/cddl/boot/zfs/zfssubr.c
@ -0,0 +1,193 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+static uint64_t zfs_crc64_table[256];
+
+static void
+zfs_init_crc(void)
+{
+	int i, j;
+	uint64_t *ct;
+
+	/*
+	 * Calculate the crc64 table (used for the zap hash
+	 * function).
+	 */
+	if (zfs_crc64_table[128] != ZFS_CRC64_POLY) {
+		memset(zfs_crc64_table, 0, sizeof(zfs_crc64_table));
+		for (i = 0; i < 256; i++)
+			for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
+				*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
+	}
+}
+
+static void
+zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
+}
+
+/*
+ * Signature for checksum functions.
+ */
+typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
+
+/*
+ * Information about each checksum function.
+ */
+typedef struct zio_checksum_info {
+	zio_checksum_t	*ci_func[2]; /* checksum function for each byteorder */
+	int		ci_correctable;	/* number of correctable bits	*/
+	int		ci_zbt;		/* uses zio block tail?	*/
+	const char	*ci_name;	/* descriptive name */
+} zio_checksum_info_t;
+
+#include "fletcher.c"
+#include "sha256.c"
+
+static zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
+	{{NULL,			NULL},			0, 0,	"inherit"},
+	{{NULL,			NULL},			0, 0,	"on"},
+	{{zio_checksum_off,	zio_checksum_off},	0, 0,	"off"},
+	{{zio_checksum_SHA256,	NULL},			1, 1,	"label"},
+	{{zio_checksum_SHA256,	NULL},			1, 1,	"gang_header"},
+	{{fletcher_2_native,	NULL},			0, 1,	"zilog"},
+	{{fletcher_2_native,	NULL},			0, 0,	"fletcher2"},
+	{{fletcher_4_native,	NULL},			1, 0,	"fletcher4"},
+	{{zio_checksum_SHA256,	NULL},			1, 0,	"SHA256"},
+};
+
+/*
+ * Common signature for all zio compress/decompress functions.
+ */
+typedef size_t zio_compress_func_t(void *src, void *dst,
+    size_t s_len, size_t d_len, int);
+typedef int zio_decompress_func_t(void *src, void *dst,
+    size_t s_len, size_t d_len, int);
+
+/*
+ * Information about each compression function.
+ */
+typedef struct zio_compress_info {
+	zio_compress_func_t	*ci_compress;	/* compression function */
+	zio_decompress_func_t	*ci_decompress;	/* decompression function */
+	int			ci_level;	/* level parameter */
+	const char		*ci_name;	/* algorithm name */
+} zio_compress_info_t;
+
+#include "lzjb.c"
+
+/*
+ * Compression vectors.
+ */
+static zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
+	{NULL,			NULL,			0,	"inherit"},
+	{NULL,			NULL,			0,	"on"},
+	{NULL,			NULL,			0,	"uncompressed"},
+	{NULL,			lzjb_decompress,	0,	"lzjb"},
+	{NULL,			NULL,			0,	"empty"},
+	{NULL,			NULL,			1,	"gzip-1"},
+	{NULL,			NULL,			2,	"gzip-2"},
+	{NULL,			NULL,			3,	"gzip-3"},
+	{NULL,			NULL,			4,	"gzip-4"},
+	{NULL,			NULL,			5,	"gzip-5"},
+	{NULL,			NULL,			6,	"gzip-6"},
+	{NULL,			NULL,			7,	"gzip-7"},
+	{NULL,			NULL,			8,	"gzip-8"},
+	{NULL,			NULL,			9,	"gzip-9"},
+};
+
+static int
+zio_checksum_error(const blkptr_t *bp, void *data)
+{
+	zio_cksum_t zc = bp->blk_cksum;
+	unsigned int checksum = BP_GET_CHECKSUM(bp);
+	uint64_t size = BP_GET_PSIZE(bp);
+	zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1;
+	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
+	zio_cksum_t actual_cksum, expected_cksum;
+
+	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
+		return (EINVAL);
+
+	if (ci->ci_zbt) {
+		expected_cksum = zbt->zbt_cksum;
+		zbt->zbt_cksum = zc;
+		ci->ci_func[0](data, size, &actual_cksum);
+		zbt->zbt_cksum = expected_cksum;
+		zc = expected_cksum;
+	} else {
+		/* ASSERT(!BP_IS_GANG(bp)); */
+		ci->ci_func[0](data, size, &actual_cksum);
+	}
+
+	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, zc)) {
+		/*printf("ZFS: read checksum failed\n");*/
+		return (EIO);
+	}
+
+	return (0);
+}
+
+static int
+zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
+	void *dest, uint64_t destsize)
+{
+	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
+
+	/* ASSERT((uint_t)cpfunc < ZIO_COMPRESS_FUNCTIONS); */
+	if (!ci->ci_decompress) {
+		printf("ZFS: unsupported compression algorithm %d\n", cpfunc);
+		return (EIO);
+	}
+
+	return (ci->ci_decompress(src, dest, srcsize, destsize, ci->ci_level));
+}
+
+static uint64_t
+zap_hash(uint64_t salt, const char *name)
+{
+	const uint8_t *cp;
+	uint8_t c;
+	uint64_t crc = salt;
+
+	/*ASSERT(crc != 0);*/
+	/*ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);*/
+	for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
+		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF];
+
+	/*
+	 * Only use 28 bits, since we need 4 bits in the cookie for the
+	 * collision differentiator.  We MUST use the high bits, since
+	 * those are the onces that we first pay attention to when
+	 * chosing the bucket.
+	 */
+	crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
+
+	return (crc);
+}
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c
@ -61,6 +61,15 @@ atomic_add_64(volatile uint64_t *target, int64_t delta)
 	*target += delta;
 	mtx_unlock(&atomic_mtx);
 }
+
+void
+atomic_dec_64(volatile uint64_t *target)
+{
+
+	mtx_lock(&atomic_mtx);
+	*target -= 1;
+	mtx_unlock(&atomic_mtx);
+}
 #endif

 uint64_t
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
@ -94,7 +94,7 @@ zfs_kmem_free(void *buf, size_t size __unused)
 {
 #ifdef KMEM_DEBUG
 	if (buf == NULL) {
-		printf("%s: attempt to free NULL\n",__func__);
+		printf("%s: attempt to free NULL\n", __func__);
 		return;
 	}
 	struct kmem_item *i;
@ -156,7 +156,7 @@ kmem_cache_create(char *name, size_t bufsize, size_t align,
 	cache->kc_constructor = constructor;
 	cache->kc_destructor = destructor;
 	cache->kc_private = private;
-#ifdef _KERNEL
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	cache->kc_zone = uma_zcreate(cache->kc_name, bufsize,
 	    constructor != NULL ? kmem_std_constructor : NULL,
 	    destructor != NULL ? kmem_std_destructor : NULL,
@ -171,23 +171,23 @@ kmem_cache_create(char *name, size_t bufsize, size_t align,
 void
 kmem_cache_destroy(kmem_cache_t *cache)
 {
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	uma_zdestroy(cache->kc_zone);
+#endif
 	kmem_free(cache, sizeof(*cache));
 }

 void *
 kmem_cache_alloc(kmem_cache_t *cache, int flags)
 {
-#ifdef _KERNEL
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	return (uma_zalloc_arg(cache->kc_zone, cache, flags));
 #else
 	void *p;

 	p = kmem_alloc(cache->kc_size, flags);
-	if (p != NULL) {
-		kmem_std_constructor(p, cache->kc_size, cache->kc_private,
-		    flags);
-	}
+	if (p != NULL && cache->kc_constructor != NULL)
+		kmem_std_constructor(p, cache->kc_size, cache, flags);
 	return (p);
 #endif
 }
@ -195,10 +195,11 @@ kmem_cache_alloc(kmem_cache_t *cache, int flags)
 void
 kmem_cache_free(kmem_cache_t *cache, void *buf)
 {
-#ifdef _KERNEL
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	uma_zfree_arg(cache->kc_zone, buf, cache);
 #else
-	kmem_std_destructor(buf, cache->kc_size, cache->kc_private);
+	if (cache->kc_destructor != NULL)
+		kmem_std_destructor(buf, cache->kc_size, cache);
 	kmem_free(buf, cache->kc_size);
 #endif
 }
@ -207,7 +208,9 @@ kmem_cache_free(kmem_cache_t *cache, void *buf)
 void
 kmem_cache_reap_now(kmem_cache_t *cache)
 {
+#ifndef KMEM_DEBUG
 	zone_drain(cache->kc_zone);
+#endif
 }

 void
@ -253,6 +256,8 @@ kmem_show(void *dummy __unused)
 		printf("KMEM_DEBUG: Leaked elements:\n\n");
 		LIST_FOREACH(i, &kmem_items, next) {
 			printf("address=%p\n", i);
+			stack_print_ddb(&i->stack);
+			printf("\n");
 		}
 	}
 	mtx_unlock(&kmem_items_mtx);
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+ 
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+
+int
+lookupname(char *dirname, enum uio_seg seg, enum symfollow follow,
+    vnode_t **dirvpp, vnode_t **compvpp)
+{
+
+	return (lookupnameat(dirname, seg, follow, dirvpp, compvpp, NULL));
+}
+
+int
+lookupnameat(char *dirname, enum uio_seg seg, enum symfollow follow,
+    vnode_t **dirvpp, vnode_t **compvpp, vnode_t *startvp)
+{
+	struct nameidata nd;
+	int error, ltype;
+
+	ASSERT(dirvpp == NULL);
+
+	vref(startvp);
+	ltype = VOP_ISLOCKED(startvp);
+	VOP_UNLOCK(startvp, 0);
+	NDINIT_ATVP(&nd, LOOKUP, LOCKLEAF | MPSAFE | follow, seg, dirname,
+	    startvp, curthread);
+	error = namei(&nd);
+	*compvpp = nd.ni_vp;
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vn_lock(startvp, ltype | LK_RETRY);
+	return (error);
+}
+
+int
+traverse(vnode_t **cvpp, int lktype)
+{
+	kthread_t *td = curthread;
+	vnode_t *cvp;
+	vnode_t *tvp;
+	vfs_t *vfsp;
+	int error;
+
+	cvp = *cvpp;
+	tvp = NULL;
+
+	/*
+	 * If this vnode is mounted on, then we transparently indirect
+	 * to the vnode which is the root of the mounted file system.
+	 * Before we do this we must check that an unmount is not in
+	 * progress on this vnode.
+	 */
+
+	for (;;) {
+		/*
+		 * Reached the end of the mount chain?
+		 */
+		vfsp = vn_mountedvfs(cvp);
+		if (vfsp == NULL)
+			break;
+		/*
+		 * tvp is NULL for *cvpp vnode, which we can't unlock.
+		 */
+		if (tvp != NULL)
+			vput(cvp);
+		else
+			vrele(cvp);
+
+		/*
+		 * The read lock must be held across the call to VFS_ROOT() to
+		 * prevent a concurrent unmount from destroying the vfs.
+		 */
+		error = VFS_ROOT(vfsp, lktype, &tvp, td);
+		if (error != 0)
+			return (error);
+		cvp = tvp;
+	}
+
+	*cvpp = cvp;
+	return (0);
+}
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_misc.c
@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/libkern.h>
+#include <sys/limits.h>
 #include <sys/misc.h>
 #include <sys/sunddi.h>

@ -39,18 +40,31 @@ struct opensolaris_utsname utsname = {
 	.nodename = hostname
 };

+int
+ddi_strtol(const char *str, char **nptr, int base, long *result)
+{
+
+	*result = strtol(str, nptr, base);
+	if (*result == 0)
+		return (EINVAL);
+	else if (*result == LONG_MIN || *result == LONG_MAX)
+		return (ERANGE);
+	return (0);
+}
+
 int
 ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
 {
-	char *end;

 	if (str == hw_serial) {
 		*result = hostid;
 		return (0);
 	}

-	*result = strtoul(str, &end, base);
+	*result = strtoul(str, nptr, base);
 	if (*result == 0)
 		return (EINVAL);
+	else if (*result == ULONG_MAX)
+		return (ERANGE);
 	return (0);
 }
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
@ -30,9 +30,20 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/priv.h>
 #include <sys/vnode.h>
+#include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <sys/jail.h>
 #include <sys/policy.h>
+#include <sys/zfs_vfsops.h>
+
+int
+secpolicy_nfs(struct ucred *cred)
+{
+
+	/* TODO: Change PRIV_ROOT! */
+	return (priv_check_cred(cred, PRIV_ROOT, 0));
+}

 int
 secpolicy_zfs(struct ucred *cred)
@ -62,16 +73,32 @@ secpolicy_fs_unmount(struct ucred *cred, struct mount *vfsp __unused)
 	return (priv_check_cred(cred, PRIV_VFS_UNMOUNT, 0));
 }

+int
+secpolicy_fs_owner(struct mount *mp, struct ucred *cred)
+{
+
+	if (zfs_super_owner) {
+		if (cred->cr_uid == mp->mnt_cred->cr_uid &&
+		    (!jailed(cred) ||
+		     cred->cr_prison == mp->mnt_cred->cr_prison)) {
+			return (0);
+		}
+	}
+	return (priv_check_cred(cred, PRIV_VFS_MOUNT_OWNER, 0));
+}
+
 /*
 * This check is done in kern_link(), so we could just return 0 here.
 */
 extern int hardlink_check_uid;
 int
-secpolicy_basic_link(struct ucred *cred)
+secpolicy_basic_link(struct vnode *vp, struct ucred *cred)
 {

 	if (!hardlink_check_uid)
 		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
 	return (priv_check_cred(cred, PRIV_VFS_LINK, 0));
 }

@ -83,9 +110,11 @@ secpolicy_vnode_stky_modify(struct ucred *cred)
 }

 int
-secpolicy_vnode_remove(struct ucred *cred)
+secpolicy_vnode_remove(struct vnode *vp, struct ucred *cred)
 {

+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
 	return (priv_check_cred(cred, PRIV_VFS_ADMIN, 0));
 }

@ -94,9 +123,11 @@ secpolicy_vnode_access(struct ucred *cred, struct vnode *vp, uint64_t owner,
    accmode_t accmode)
 {

-	if ((accmode & VREAD) && priv_check_cred(cred, PRIV_VFS_READ, 0) != 0) {
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
+
+	if ((accmode & VREAD) && priv_check_cred(cred, PRIV_VFS_READ, 0) != 0)
 		return (EACCES);
-	}
 	if ((accmode & VWRITE) &&
 	    priv_check_cred(cred, PRIV_VFS_WRITE, 0) != 0) {
 		return (EACCES);
@ -116,11 +147,13 @@ secpolicy_vnode_access(struct ucred *cred, struct vnode *vp, uint64_t owner,
 }

 int
-secpolicy_vnode_setdac(struct ucred *cred, uid_t owner)
+secpolicy_vnode_setdac(struct vnode *vp, struct ucred *cred, uid_t owner)
 {

 	if (owner == cred->cr_uid)
 		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
 	return (priv_check_cred(cred, PRIV_VFS_ADMIN, 0));
 }

@ -148,7 +181,7 @@ secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp, struct vattr *vap,
 		 * In the specific case of creating a set-uid root
 		 * file, we need even more permissions.
 		 */
-		error = secpolicy_vnode_setdac(cred, ovap->va_uid);
+		error = secpolicy_vnode_setdac(vp, cred, ovap->va_uid);
 		if (error)
 			return (error);
 		error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cred);
@ -158,7 +191,7 @@ secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp, struct vattr *vap,
 		vap->va_mode = ovap->va_mode;
 	}
 	if (mask & (AT_UID | AT_GID)) {
-		error = secpolicy_vnode_setdac(cred, ovap->va_uid);
+		error = secpolicy_vnode_setdac(vp, cred, ovap->va_uid);
 		if (error)
 			return (error);

@ -170,14 +203,16 @@ secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp, struct vattr *vap,
 		if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
 		    ((mask & AT_GID) && vap->va_gid != ovap->va_gid &&
 		     !groupmember(vap->va_gid, cred))) {
-			error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
-			if (error)
-				return (error);
+			if (secpolicy_fs_owner(vp->v_mount, cred) != 0) {
+				error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
+				if (error)
+					return (error);
+			}
 		}

 		if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
 		    ((mask & AT_GID) && vap->va_gid != ovap->va_gid)) {
-			secpolicy_setid_clear(vap, cred);
+			secpolicy_setid_clear(vap, vp, cred);
 		}
 	}
 	if (mask & (AT_ATIME | AT_MTIME)) {
@ -189,7 +224,7 @@ secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp, struct vattr *vap,
 		 * If times is non-NULL, ... The caller must be the owner of
 		 * the file or be the super-user.
 		 */
-		error = secpolicy_vnode_setdac(cred, ovap->va_uid);
+		error = secpolicy_vnode_setdac(vp, cred, ovap->va_uid);
 		if (error && (vap->va_vaflags & VA_UTIMES_NULL))
 			error = unlocked_access(node, VWRITE, cred);
 		if (error)
@ -206,25 +241,33 @@ secpolicy_vnode_create_gid(struct ucred *cred)
 }

 int
-secpolicy_vnode_setids_setgids(struct ucred *cred, gid_t gid)
+secpolicy_vnode_setids_setgids(struct vnode *vp, struct ucred *cred, gid_t gid)
 {

-	if (!groupmember(gid, cred))
-		return (priv_check_cred(cred, PRIV_VFS_SETGID, 0));
-	return (0);
+	if (groupmember(gid, cred))
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
+	return (priv_check_cred(cred, PRIV_VFS_SETGID, 0));
 }

 int
-secpolicy_vnode_setid_retain(struct ucred *cred, boolean_t issuidroot __unused)
+secpolicy_vnode_setid_retain(struct vnode *vp, struct ucred *cred,
+    boolean_t issuidroot __unused)
 {

+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
 	return (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0));
 }

 void
-secpolicy_setid_clear(struct vattr *vap, struct ucred *cred)
+secpolicy_setid_clear(struct vattr *vap, struct vnode *vp, struct ucred *cred)
 {

+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return;
+
 	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
 			vap->va_mask |= AT_MODE;
@ -239,6 +282,9 @@ secpolicy_setid_setsticky_clear(struct vnode *vp, struct vattr *vap,
 {
        int error;

+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
+
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the process
@ -253,9 +299,61 @@ secpolicy_setid_setsticky_clear(struct vnode *vp, struct vattr *vap,
 	 * group-id bit.
 	 */
 	if ((vap->va_mode & S_ISGID) != 0) {
-		error = secpolicy_vnode_setids_setgids(cred, ovap->va_gid);
+		error = secpolicy_vnode_setids_setgids(vp, cred, ovap->va_gid);
 		if (error)
 			return (error);
 	}
 	return (0);
 }
+
+int
+secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp)
+{
+
+	return (priv_check_cred(cr, PRIV_VFS_MOUNT, 0));
+}
+
+int
+secpolicy_vnode_owner(struct vnode *vp, cred_t *cred, uid_t owner)
+{
+
+	if (owner == cred->cr_uid)
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
+
+	/* XXX: vfs_suser()? */
+	return (priv_check_cred(cred, PRIV_VFS_MOUNT_OWNER, 0));
+}
+
+int
+secpolicy_vnode_chown(struct vnode *vp, cred_t *cred, boolean_t check_self)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)
+		return (0);
+	return (priv_check_cred(cred, PRIV_VFS_CHOWN, 0));
+}
+
+void
+secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp)
+{
+
+	if (priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER, 0) != 0) {
+		MNT_ILOCK(vfsp);
+		vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER;
+		vfs_clearmntopt(vfsp, MNTOPT_SETUID);
+		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 0);
+		MNT_IUNLOCK(vfsp);
+	}
+}
+
+/*
+ * Check privileges for setting xvattr attributes
+ */
+int
+secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
+{
+
+	return (priv_check_cred(cr, PRIV_VFS_SYSFLAGS, 0));
+}
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
+#include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/cred.h>
 #include <sys/vfs.h>
@ -109,61 +110,13 @@ vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
 	return (error != 0 ? 0 : 1);
 }

-int
-traverse(vnode_t **cvpp, int lktype)
-{
-	kthread_t *td = curthread;
-	vnode_t *cvp;
-	vnode_t *tvp;
-	vfs_t *vfsp;
-	int error;
-
-	cvp = *cvpp;
-	tvp = NULL;
-
-	/*
-	 * If this vnode is mounted on, then we transparently indirect
-	 * to the vnode which is the root of the mounted file system.
-	 * Before we do this we must check that an unmount is not in
-	 * progress on this vnode.
-	 */
-
-	for (;;) {
-		/*
-		 * Reached the end of the mount chain?
-		 */
-		vfsp = vn_mountedvfs(cvp);
-		if (vfsp == NULL)
-			break;
-		/*
-		 * tvp is NULL for *cvpp vnode, which we can't unlock.
-		 */
-		if (tvp != NULL)
-			vput(cvp);
-		else
-			vrele(cvp);
-
-		/*
-		 * The read lock must be held across the call to VFS_ROOT() to
-		 * prevent a concurrent unmount from destroying the vfs.
-		 */
-		error = VFS_ROOT(vfsp, lktype, &tvp, td);
-		if (error != 0)
-			return (error);
-		cvp = tvp;
-	}
-
-	*cvpp = cvp;
-	return (0);
-}
-
 int
 domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,
    char *fspec, int fsflags)
 {
 	struct mount *mp;
 	struct vfsconf *vfsp;
-	struct ucred *newcr, *oldcr;
+	struct ucred *cr;
 	int error;

 	/*
@ -203,29 +156,31 @@ domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,

 	/*
 	 * Set the mount level flags.
-	 * crdup() can sleep, so do it before acquiring a mutex.
 	 */
-	newcr = crdup(kcred);
-	MNT_ILOCK(mp);
 	if (fsflags & MNT_RDONLY)
 		mp->mnt_flag |= MNT_RDONLY;
 	mp->mnt_flag &=~ MNT_UPDATEMASK;
 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
 	/*
 	 * Unprivileged user can trigger mounting a snapshot, but we don't want
-	 * him to unmount it, so we switch to privileged credentials.
+	 * him to unmount it, so we switch to privileged of original mount.
 	 */
-	oldcr = mp->mnt_cred;
-	mp->mnt_cred = newcr;
+	crfree(mp->mnt_cred);
+	mp->mnt_cred = crdup(vp->v_mount->mnt_cred);
 	mp->mnt_stat.f_owner = mp->mnt_cred->cr_uid;
-	MNT_IUNLOCK(mp);
-	crfree(oldcr);
 	/*
 	 * Mount the filesystem.
 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 	 * get.  No freeing of cn_pnbuf.
 	 */
+	/*
+	 * XXX: This is evil, but we can't mount a snapshot as a regular user.
+	 * XXX: Is is safe when snapshot is mounted from within a jail?
+	 */
+	cr = td->td_ucred;
+	td->td_ucred = kcred;
 	error = VFS_MOUNT(mp, td);
+	td->td_ucred = cr;

 	if (!error) {
 		if (mp->mnt_opt != NULL)
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_zone.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_zone.c
@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/jail.h>
+#include <sys/osd.h>
 #include <sys/priv.h>
 #include <sys/zone.h>

@ -52,7 +53,7 @@ typedef struct zone_dataset {

 LIST_HEAD(zone_dataset_head, zone_dataset);

-static struct prison_service *zone_prison_service = NULL;
+static int zone_slot;

 int
 zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
@ -60,7 +61,7 @@ zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
 	struct zone_dataset_head *head;
 	zone_dataset_t *zd, *zd2;
 	struct prison *pr;
-	int error;
+	int dofree, error;

 	if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL, 0)) != 0)
 		return (error);
@ -76,18 +77,33 @@ zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
 		return (ENOENT);
 	}

-	head = prison_service_data_get(zone_prison_service, pr);
-	LIST_FOREACH(zd2, head, zd_next) {
-		if (strcmp(dataset, zd2->zd_dataset) == 0) {
-			free(zd, M_ZONES);
-			error = EEXIST;
-			goto failure;
+	head = osd_jail_get(pr, zone_slot);
+	if (head != NULL) {
+		dofree = 0;
+		LIST_FOREACH(zd2, head, zd_next) {
+			if (strcmp(dataset, zd2->zd_dataset) == 0) {
+				free(zd, M_ZONES);
+				error = EEXIST;
+				goto end;
+			}
 		}
+	} else {
+		dofree = 1;
+		prison_hold_locked(pr);
+		mtx_unlock(&pr->pr_mtx);
+		head = malloc(sizeof(*head), M_ZONES, M_WAITOK);
+		LIST_INIT(head);
+		mtx_lock(&pr->pr_mtx);
+		error = osd_jail_set(pr, zone_slot, head);
+		KASSERT(error == 0, ("osd_jail_set() failed (error=%d)", error));
 	}
 	strcpy(zd->zd_dataset, dataset);
 	LIST_INSERT_HEAD(head, zd, zd_next);
-failure:
-	mtx_unlock(&pr->pr_mtx);
+end:
+	if (dofree)
+		prison_free_locked(pr);
+	else
+		mtx_unlock(&pr->pr_mtx);
 	return (error);
 }

@ -107,16 +123,25 @@ zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid)
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENOENT);
-	head = prison_service_data_get(zone_prison_service, pr);
-	LIST_FOREACH(zd, head, zd_next) {
-		if (strcmp(dataset, zd->zd_dataset) == 0) {
-			LIST_REMOVE(zd, zd_next);
-			free(zd, M_ZONES);
-			goto success;
-		}
+	head = osd_jail_get(pr, zone_slot);
+	if (head == NULL) {
+		error = ENOENT;
+		goto end;
 	}
-	error = ENOENT;
-success:
+	LIST_FOREACH(zd, head, zd_next) {
+		if (strcmp(dataset, zd->zd_dataset) == 0)
+			break;
+	}
+	if (zd == NULL)
+		error = ENOENT;
+	else {
+		LIST_REMOVE(zd, zd_next);
+		free(zd, M_ZONES);
+		if (LIST_EMPTY(head))
+			osd_jail_del(pr, zone_slot);
+		error = 0;
+	}
+end:
 	mtx_unlock(&pr->pr_mtx);
 	return (error);
 }
@ -136,14 +161,16 @@ zone_dataset_visible(const char *dataset, int *write)

 	if (dataset[0] == '\0')
 		return (0);
-	if (INGLOBALZONE(curproc)) {
+	if (INGLOBALZONE(curthread)) {
 		if (write != NULL)
 			*write = 1;
 		return (1);
 	}
 	pr = curthread->td_ucred->cr_prison;
 	mtx_lock(&pr->pr_mtx);
-	head = prison_service_data_get(zone_prison_service, pr);
+	head = osd_jail_get(pr, zone_slot);
+	if (head == NULL)
+		goto end;

 	/*
 	 * Walk the list once, looking for datasets which match exactly, or
@ -188,49 +215,32 @@ zone_dataset_visible(const char *dataset, int *write)
 	return (ret);
 }

-static int
-zone_create(struct prison_service *psrv, struct prison *pr)
-{
-	struct zone_dataset_head *head;
-
-	head = malloc(sizeof(*head), M_ZONES, M_WAITOK);
-	LIST_INIT(head);
-	mtx_lock(&pr->pr_mtx);
-	prison_service_data_set(psrv, pr, head);
-	mtx_unlock(&pr->pr_mtx);
-	return (0);
-}
-
-static int
-zone_destroy(struct prison_service *psrv, struct prison *pr)
+static void
+zone_destroy(void *arg)
 {
 	struct zone_dataset_head *head;
 	zone_dataset_t *zd;

-	mtx_lock(&pr->pr_mtx);
-	head = prison_service_data_del(psrv, pr);
-	mtx_unlock(&pr->pr_mtx);
-	while ((zd = LIST_FIRST(head)) != NULL) {
-		LIST_REMOVE(zd, zd_next);
-		free(zd, M_ZONES);
-	}
-	free(head, M_ZONES);
-	return (0);
+	head = arg;
+        while ((zd = LIST_FIRST(head)) != NULL) {
+                LIST_REMOVE(zd, zd_next);
+                free(zd, M_ZONES);
+        }
+        free(head, M_ZONES);
 }

 static void
 zone_sysinit(void *arg __unused)
 {

-	zone_prison_service = prison_service_register("zfs", zone_create,
-	    zone_destroy);
+	zone_slot = osd_jail_register(zone_destroy);
 }

 static void
 zone_sysuninit(void *arg __unused)
 {

-	prison_service_deregister(zone_prison_service);
+	osd_jail_deregister(zone_slot);
 }

 SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL);
--- a/sys/cddl/compat/opensolaris/sys/atomic.h
+++ b/sys/cddl/compat/opensolaris/sys/atomic.h
@ -38,6 +38,7 @@

 #ifndef __LP64__
 extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
+extern void atomic_dec_64(volatile uint64_t *target);
 extern void *atomic_cas_ptr(volatile void *target, void *cmp,  void *newval);
 #endif
 #ifndef __sparc64__
@ -83,6 +84,14 @@ atomic_dec_32_nv(volatile uint32_t *target)
 	return (atomic_fetchadd_32(target, -1) - 1);
 }

+#ifdef __LP64__
+static __inline void
+atomic_dec_64(volatile uint64_t *target)
+{
+	atomic_subtract_64(target, 1);
+}
+#endif
+
 static __inline void
 atomic_inc_32(volatile uint32_t *target)
 {
--- a/sys/cddl/compat/opensolaris/sys/callb.h
+++ b/sys/cddl/compat/opensolaris/sys/callb.h
@ -1,219 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * $FreeBSD$
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef	_SYS_CALLB_H
-#define	_SYS_CALLB_H
-
-#pragma ident	"@(#)callb.h	1.29	05/06/23 SMI"
-
-#include <sys/kcondvar.h>
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * definitions of callback classes (c_class)
- *
- * Callbacks belong in the same class if (1) their callback routines
- * do the same kind of processing (ideally, using the same callback function)
- * and (2) they can/should be executed at the same time in a cpr
- * suspend/resume operation.
- *
- * Note: The DAEMON class, in particular, is for stopping kernel threads
- * and nothing else.  The CALLB_* macros below should be used to deal
- * with kernel threads, and the callback function should be callb_generic_cpr.
- * Another idiosyncrasy of the DAEMON class is that if a suspend operation
- * fails, some of the callback functions may be called with the RESUME
- * code which were never called with SUSPEND.  Not a problem currently,
- * but see bug 4201851.
- */
-#define	CB_CL_CPR_DAEMON	0
-#define	CB_CL_CPR_VM		1
-#define	CB_CL_CPR_CALLOUT	2
-#define	CB_CL_CPR_OBP		3
-#define	CB_CL_CPR_FB		4
-#define	CB_CL_PANIC		5
-#define	CB_CL_CPR_RPC		6
-#define	CB_CL_CPR_PROMPRINTF	7
-#define	CB_CL_UADMIN		8
-#define	CB_CL_CPR_PM		9
-#define	CB_CL_HALT		10
-#define	CB_CL_CPR_DMA		11
-#define	CB_CL_CPR_POST_USER	12
-#define	CB_CL_UADMIN_PRE_VFS    13
-#define	CB_CL_MDBOOT		CB_CL_UADMIN
-#define	CB_CL_ENTER_DEBUGGER	14
-#define	CB_CL_CPR_POST_KERNEL	15
-#define	NCBCLASS		16 /* CHANGE ME if classes are added/removed */
-
-/*
- * CB_CL_CPR_DAEMON class specific definitions are given below:
- */
-
-/*
- * code for CPR callb_execute_class
- */
-#define	CB_CODE_CPR_CHKPT	0
-#define	CB_CODE_CPR_RESUME	1
-
-typedef	void *		callb_id_t;
-/*
- * Per kernel thread structure for CPR daemon callbacks.
- * Must be protected by either a existing lock in the daemon or
- * a new lock created for such a purpose.
- */
-typedef struct callb_cpr {
-	kmutex_t	*cc_lockp;	/* lock to protect this struct */
-	char		cc_events;	/* various events for CPR */
-	callb_id_t	cc_id;		/* callb id address */
-	kcondvar_t	cc_callb_cv;	/* cv for callback waiting */
-	kcondvar_t	cc_stop_cv;	/* cv to checkpoint block */
-} callb_cpr_t;
-
-/*
- * cc_events definitions
- */
-#define	CALLB_CPR_START		1	/* a checkpoint request's started */
-#define	CALLB_CPR_SAFE		2	/* thread is safe for CPR */
-#define	CALLB_CPR_ALWAYS_SAFE	4	/* thread is ALWAYS safe for CPR */
-
-/*
- * Used when checking that all kernel threads are stopped.
- */
-#define	CALLB_MAX_RETRY		3	/* when waiting for kthread to sleep */
-#define	CALLB_THREAD_DELAY	10	/* ticks allowed to reach sleep */
-#define	CPR_KTHREAD_TIMEOUT_SEC	90	/* secs before callback times out -- */
-					/* due to pwr mgmt of disks, make -- */
-					/* big enough for worst spinup time */
-
-#ifdef  _KERNEL
-/*
- *
- * CALLB_CPR_INIT macro is used by kernel threads to add their entry to
- * the callback table and perform other initialization.  It automatically
- * adds the thread as being in the callback class CB_CL_CPR_DAEMON.
- *
- *	cp    - ptr to the callb_cpr_t structure for this kernel thread
- *
- *	lockp - pointer to mutex protecting the callb_cpr_t stuct
- *
- *	func  - pointer to the callback function for this kernel thread.
- *		It has the prototype boolean_t <func>(void *arg, int code)
- *		where: arg	- ptr to the callb_cpr_t structure
- *		       code	- not used for this type of callback
- *		returns: B_TRUE if successful; B_FALSE if unsuccessful.
- *
- *	name  - a string giving the name of the kernel thread
- *
- * Note: lockp is the lock to protect the callb_cpr_t (cp) structure
- * later on.  No lock held is needed for this initialization.
- */
-#define	CALLB_CPR_INIT(cp, lockp, func, name)	{			\
-		strlcpy(curthread->td_name, (name),			\
-		    sizeof(curthread->td_name));			\
-		strlcpy(curthread->td_proc->p_comm, (name),		\
-		    sizeof(curthread->td_proc->p_comm));		\
-		bzero((caddr_t)(cp), sizeof (callb_cpr_t));		\
-		(cp)->cc_lockp = lockp;					\
-		(cp)->cc_id = callb_add(func, (void *)(cp),		\
-			CB_CL_CPR_DAEMON, name);			\
-	}
-
-#ifndef __lock_lint
-#define	CALLB_CPR_ASSERT(cp)	ASSERT(MUTEX_HELD((cp)->cc_lockp));
-#else
-#define	CALLB_CPR_ASSERT(cp)
-#endif
-/*
- * Some threads (like the idle threads) do not adhere to the callback
- * protocol and are always considered safe.  Such threads must never exit.
- * They register their presence by calling this macro during their
- * initialization.
- *
- * Args:
- *	t	- thread pointer of the client kernel thread
- *	name	- a string giving the name of the kernel thread
- */
-#define	CALLB_CPR_INIT_SAFE(t, name) {					\
-		(void) callb_add_thread(callb_generic_cpr_safe,		\
-		(void *) &callb_cprinfo_safe, CB_CL_CPR_DAEMON,		\
-		    name, t);						\
-	}
-/*
- * The lock to protect cp's content must be held before
- * calling the following two macros.
- *
- * Any code region between CALLB_CPR_SAFE_BEGIN and CALLB_CPR_SAFE_END
- * is safe for checkpoint/resume.
- */
-#define	CALLB_CPR_SAFE_BEGIN(cp) { 			\
-		CALLB_CPR_ASSERT(cp)			\
-		(cp)->cc_events |= CALLB_CPR_SAFE;	\
-		if ((cp)->cc_events & CALLB_CPR_START)	\
-			cv_signal(&(cp)->cc_callb_cv);	\
-	}
-#define	CALLB_CPR_SAFE_END(cp, lockp) {				\
-		CALLB_CPR_ASSERT(cp)				\
-		while ((cp)->cc_events & CALLB_CPR_START)	\
-			cv_wait(&(cp)->cc_stop_cv, lockp);	\
-		(cp)->cc_events &= ~CALLB_CPR_SAFE;		\
-	}
-/*
- * cv_destroy is nop right now but may be needed in the future.
- */
-#define	CALLB_CPR_EXIT(cp) {				\
-		CALLB_CPR_ASSERT(cp)			\
-		(cp)->cc_events |= CALLB_CPR_SAFE;	\
-		if ((cp)->cc_events & CALLB_CPR_START)	\
-			cv_signal(&(cp)->cc_callb_cv);	\
-		mutex_exit((cp)->cc_lockp);		\
-		(void) callb_delete((cp)->cc_id);	\
-		cv_destroy(&(cp)->cc_callb_cv);		\
-		cv_destroy(&(cp)->cc_stop_cv);		\
-	}
-
-extern callb_cpr_t callb_cprinfo_safe;
-extern callb_id_t callb_add(boolean_t  (*)(void *, int), void *, int, char *);
-extern callb_id_t callb_add_thread(boolean_t (*)(void *, int),
-    void *, int, char *, kthread_id_t);
-extern int	callb_delete(callb_id_t);
-extern void	callb_execute(callb_id_t, int);
-extern void	*callb_execute_class(int, int);
-extern boolean_t callb_generic_cpr(void *, int);
-extern boolean_t callb_generic_cpr_safe(void *, int);
-extern boolean_t callb_is_stopped(kthread_id_t, caddr_t *);
-extern void	callb_lock_table(void);
-extern void	callb_unlock_table(void);
-#endif
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_CALLB_H */
--- a/sys/cddl/compat/opensolaris/sys/cred.h
+++ b/sys/cddl/compat/opensolaris/sys/cred.h
@ -30,12 +30,14 @@
 #define	_OPENSOLARIS_SYS_CRED_H_

 #include <sys/param.h>
-#include_next <sys/ucred.h>
-
-#ifdef _KERNEL
+#define	_WANT_UCRED
+#include <sys/ucred.h>
+#undef _WANT_UCRED

 typedef struct ucred cred_t;
+typedef struct ucred ucred_t;

+#ifdef _KERNEL
 #define	CRED()		(curthread->td_ucred)

 /*
@ -43,9 +45,14 @@ typedef struct ucred cred_t;
 */
 #define	kcred	(thread0.td_ucred)

-#define	crgetuid(cred)	((cred)->cr_uid)
-#define	crgetgid(cred)	((cred)->cr_gid)
-
-#endif	/* _KERNEL */
+#define	crgetuid(cred)		((cred)->cr_uid)
+#define	crgetgid(cred)		((cred)->cr_gid)
+#define	crgetgroups(cred)	((cred)->cr_groups)
+#define	crgetngroups(cred)	((cred)->cr_ngroups)
+#define	crgetsid(cred, i)	(NULL)
+#else	/* !_KERNEL */
+#define	kcred		NULL
+#define	CRED()		NULL
+#endif	/* !_KERNEL */

 #endif	/* _OPENSOLARIS_SYS_CRED_H_ */
--- a/sys/cddl/compat/opensolaris/sys/dnlc.h
+++ b/sys/cddl/compat/opensolaris/sys/dnlc.h
@ -35,6 +35,6 @@
 #define	dnlc_update(dvp, name, vp)	do { } while (0)
 #define	dnlc_remove(dvp, name)		do { } while (0)
 #define	dnlc_purge_vfsp(vfsp, count)	(0)
-#define	dnlc_reduce_cache(percent)	do { } while (0)
+#define	dnlc_reduce_cache(percent)	EVENTHANDLER_INVOKE(vfs_lowvnodes, (int)(intptr_t)(percent))

 #endif	/* !_OPENSOLARIS_SYS_DNLC_H_ */
--- a/sys/cddl/compat/opensolaris/sys/file.h
+++ b/sys/cddl/compat/opensolaris/sys/file.h
@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_FILE_H_
+#define	_OPENSOLARIS_SYS_FILE_H_
+
+#include_next <sys/file.h>
+
+#ifdef _KERNEL
+typedef	struct file	file_t;
+
+static __inline file_t *
+getf(int fd, int write)
+{
+	struct file *fp;
+
+	if (write && fget_write(curthread, fd, &fp) == 0)
+		return (fp);
+	else if (!write && fget_read(curthread, fd, &fp) == 0)
+		return (fp);
+	return (NULL);
+}
+
+static __inline void
+releasef(file_t *fp)
+{
+
+	fdrop(fp, curthread);
+}
+#endif	/* _KERNEL */
+
+#endif	/* !_OPENSOLARIS_SYS_FILE_H_ */
--- a/sys/cddl/compat/opensolaris/sys/kidmap.h
+++ b/sys/cddl/compat/opensolaris/sys/kidmap.h
@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_KIDMAP_H_
+#define	_OPENSOLARIS_SYS_KIDMAP_H_
+
+#include <sys/idmap.h>
+
+typedef int32_t	idmap_stat;
+typedef void	idmap_get_handle_t;
+
+#define	kidmap_get_create()		(NULL)
+#define	kidmap_get_destroy(hdl)		do { } while (0)
+#define	kidmap_get_mappings(hdl)	(NULL)
+
+#endif	/* _OPENSOLARIS_SYS_KIDMAP_H_ */
--- a/sys/cddl/compat/opensolaris/sys/kmem.h
+++ b/sys/cddl/compat/opensolaris/sys/kmem.h
@ -38,15 +38,16 @@
 #include <vm/vm_extern.h>

 #define	KM_SLEEP		M_WAITOK
+#define	KM_PUSHPAGE		M_WAITOK
 #define	KM_NOSLEEP		M_NOWAIT
 #define	KMC_NODEBUG		0

 typedef struct kmem_cache {
 	char		kc_name[32];
-#ifdef _KERNEL
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	uma_zone_t	kc_zone;
 #else
-	size_t		size;
+	size_t		kc_size;
 #endif
 	int		(*kc_constructor)(void *, void *, int);
 	void		(*kc_destructor)(void *, void *);
--- a/sys/cddl/compat/opensolaris/sys/misc.h
+++ b/sys/cddl/compat/opensolaris/sys/misc.h
@ -29,6 +29,13 @@
 #ifndef _OPENSOLARIS_SYS_MISC_H_
 #define	_OPENSOLARIS_SYS_MISC_H_

+#define	MAXUID	2147483647
+
+#define	SPEC_MAXOFFSET_T	OFF_MAX
+
+#define	_ACL_ACLENT_ENABLED	0x1
+#define	_ACL_ACE_ENABLED	0x2
+
 #define	_FIOFFS		(INT_MIN)
 #define	_FIOGDIO	(INT_MIN+1)
 #define	_FIOSDIO	(INT_MIN+2)
--- a/sys/cddl/compat/opensolaris/sys/mntent.h
+++ b/sys/cddl/compat/opensolaris/sys/mntent.h
@ -54,5 +54,7 @@
 #define	MNTOPT_EXEC	"exec"		/* enable executables */
 #define	MNTOPT_NOEXEC	"noexec"	/* disable executables */
 #define	MNTOPT_RESTRICT	"restrict"	/* restricted autofs mount */
+#define	MNTOPT_NBMAND	"nbmand"	/* allow non-blocking mandatory locks */
+#define	MNTOPT_NONBMAND	"nonbmand"	/* deny non-blocking mandatory locks */

 #endif	/* !_OPENSOLARIS_MNTENT_H_ */
--- a/sys/cddl/compat/opensolaris/sys/param.h
+++ b/sys/cddl/compat/opensolaris/sys/param.h
@ -34,4 +34,8 @@

 #define	PAGESIZE	PAGE_SIZE

+#ifdef _KERNEL
+#define	ptob(x)		((uint64_t)(x) << PAGE_SHIFT)
+#endif
+
 #endif
--- a/sys/cddl/compat/opensolaris/sys/pathname.h
+++ b/sys/cddl/compat/opensolaris/sys/pathname.h
@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_PATHNAME_H_
+#define	_OPENSOLARIS_SYS_PATHNAME_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+
+typedef struct pathname {
+	char	*pn_buf;		/* underlying storage */
+	char	*pn_path;		/* remaining pathname */
+	size_t	pn_pathlen;		/* remaining length */
+	size_t	pn_bufsize;		/* total size of pn_buf */
+} pathname_t;
+
+#define	pn_alloc(pnp)	panic("pn_alloc() called")
+#define	pn_free(pnp)	panic("pn_free() called")
+
+int lookupname(char *, enum uio_seg, enum symfollow, vnode_t **, vnode_t **);
+int lookupnameat(char *, enum uio_seg, enum symfollow, vnode_t **, vnode_t **,
+    vnode_t *);
+int traverse(vnode_t **, int);
+
+#endif	/* _KERNEL */
+
+#endif	/* _OPENSOLARIS_SYS_PATHNAME_H_ */
--- a/sys/cddl/compat/opensolaris/sys/policy.h
+++ b/sys/cddl/compat/opensolaris/sys/policy.h
@ -33,30 +33,44 @@

 #ifdef _KERNEL

+#include <sys/vnode.h>
+
 struct mount;
 struct ucred;
 struct vattr;
 struct vnode;

-int	secpolicy_zfs(struct ucred  *cred);
-int	secpolicy_sys_config(struct ucred  *cred, int checkonly);
-int	secpolicy_zinject(struct ucred  *cred);
-int	secpolicy_fs_unmount(struct ucred  *cred, struct mount *vfsp);
-int	secpolicy_basic_link(struct ucred  *cred);
+int	secpolicy_nfs(struct ucred *cred);
+int	secpolicy_zfs(struct ucred *cred);
+int	secpolicy_sys_config(struct ucred *cred, int checkonly);
+int	secpolicy_zinject(struct ucred *cred);
+int	secpolicy_fs_unmount(struct ucred *cred, struct mount *vfsp);
+int	secpolicy_basic_link(struct vnode *vp, struct ucred *cred);
+int	secpolicy_vnode_owner(struct vnode *vp, cred_t *cred, uid_t owner);
+int	secpolicy_vnode_chown(struct vnode *vp, cred_t *cred,
+	    boolean_t check_self);
 int	secpolicy_vnode_stky_modify(struct ucred *cred);
-int	secpolicy_vnode_remove(struct ucred *cred);
+int	secpolicy_vnode_remove(struct vnode *vp, struct ucred *cred);
 int	secpolicy_vnode_access(struct ucred *cred, struct vnode *vp,
 	    uint64_t owner, accmode_t accmode);
-int	secpolicy_vnode_setdac(struct ucred *cred, uid_t owner);
+int	secpolicy_vnode_setdac(struct vnode *vp, struct ucred *cred,
+	    uid_t owner);
 int	secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp,
 	    struct vattr *vap, const struct vattr *ovap, int flags,
 	    int unlocked_access(void *, int, struct ucred *), void *node);
 int	secpolicy_vnode_create_gid(struct ucred *cred);
-int	secpolicy_vnode_setids_setgids(struct ucred *cred, gid_t gid);
-int	secpolicy_vnode_setid_retain(struct ucred *cred, boolean_t issuidroot);
-void	secpolicy_setid_clear(struct vattr *vap, struct ucred *cred);
+int	secpolicy_vnode_setids_setgids(struct vnode *vp, struct ucred *cred,
+	    gid_t gid);
+int	secpolicy_vnode_setid_retain(struct vnode *vp, struct ucred *cred,
+	    boolean_t issuidroot);
+void	secpolicy_setid_clear(struct vattr *vap, struct vnode *vp,
+	    struct ucred *cred);
 int	secpolicy_setid_setsticky_clear(struct vnode *vp, struct vattr *vap,
 	    const struct vattr *ovap, struct ucred *cred);
+int	secpolicy_fs_owner(struct mount *vfsp, struct ucred *cred);
+int	secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp);
+void	secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp);
+int	secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype);

 #endif	/* _KERNEL */

--- a/sys/cddl/compat/opensolaris/sys/proc.h
+++ b/sys/cddl/compat/opensolaris/sys/proc.h
@ -54,12 +54,6 @@ typedef	struct thread	kthread_t;
 typedef struct thread	*kthread_id_t;
 typedef struct proc	proc_t;

-#if (KSTACK_PAGES * PAGE_SIZE) < 16384
-#define	ZFS_KSTACK_PAGES	(16384 / PAGE_SIZE)
-#else
-#define	ZFS_KSTACK_PAGES	0
-#endif
-
 static __inline kthread_t *
 thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
    size_t len, proc_t *pp, int state, pri_t pri)
@ -71,11 +65,10 @@ thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
 	 * Be sure there are no surprises.
 	 */
 	ASSERT(stk == NULL);
-	ASSERT(stksize == 0);
 	ASSERT(len == 0);
 	ASSERT(state == TS_RUN);

-	error = kproc_create(proc, arg, &p, 0, ZFS_KSTACK_PAGES,
+	error = kproc_create(proc, arg, &p, 0, stksize / PAGE_SIZE,
 	    "solthread %p", proc);
 	return (error == 0 ? FIRST_THREAD_IN_PROC(p) : NULL);
 }
--- a/sys/cddl/compat/opensolaris/sys/refstr.h
+++ b/sys/cddl/compat/opensolaris/sys/refstr.h
@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ $ $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_REFSTR_H_
+#define	_OPENSOLARIS_SYS_REFSTR_H_
+
+#define	refstr_value(str)	(str)
+
+#endif	/* _OPENSOLARIS_SYS_REFSTR_H_ */
--- a/sys/cddl/compat/opensolaris/sys/sid.h
+++ b/sys/cddl/compat/opensolaris/sys/sid.h
@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_SID_H_
+#define	_OPENSOLARIS_SYS_SID_H_
+
+typedef struct ksiddomain {
+	char	kd_name[16];	/* Domain part of SID */
+} ksiddomain_t;
+typedef void	ksid_t;
+
+static __inline ksiddomain_t *
+ksid_lookupdomain(const char *domain)
+{
+	ksiddomain_t *kd;
+
+	kd = kmem_alloc(sizeof(*kd), KM_SLEEP);
+	strlcpy(kd->kd_name, "FreeBSD", sizeof(kd->kd_name));
+	return (kd);
+}
+
+static __inline void
+ksiddomain_rele(ksiddomain_t *kd)
+{
+
+	kmem_free(kd, sizeof(*kd));
+}
+
+#endif	/* _OPENSOLARIS_SYS_SID_H_ */
--- a/Show more
+++ b/Show more