Merge r358179 through r358238.

PR: 244251
svn path=/projects/clang1000-import/; revision=358239
2024-07-23 03:06:48 +00:00 · 2020-02-22 09:58:11 +00:00 · 2020-02-22 09:58:11 +00:00 · 24a22d1d9c · 2020-12-20 02:59:44 +00:00
parent 6c140a7281 5a79fd8574
commit 24a22d1d9c
53 changed files with 1173 additions and 588 deletions
--- a/2
+++ b/2
@ -33,7 +33,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW:
 	using clang 3.5.0 or higher.

 20200220:
-	ncurses has been updated to a newer version (6.1-20200118). Given the ABI
+	ncurses has been updated to a newer version (6.2-20200215). Given the ABI
 	has changed, users will have to rebuild all the ports that are linked to
 	ncurses.

--- a/bin/sh/miscbltin.c
+++ b/bin/sh/miscbltin.c
@ -117,7 +117,7 @@ fdgetc(struct fdctx *fdc, char *c)
 static void
 fdctx_destroy(struct fdctx *fdc)
 {
-	size_t residue;
+	off_t residue;

 	if (fdc->buflen > 1) {
 	/*
--- a/lib/libc/sys/truncate.2
+++ b/lib/libc/sys/truncate.2
@ -28,7 +28,7 @@
 .\"     @(#)truncate.2	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd May 4, 2015
+.Dd January 24, 2020
 .Dt TRUNCATE 2
 .Os
 .Sh NAME
@ -160,6 +160,9 @@ system calls appeared in
 These calls should be generalized to allow ranges
 of bytes in a file to be discarded.
 .Pp
-Use of
+Historically, the use of
 .Fn truncate
-to extend a file is not portable.
+or
+.Fn ftruncate
+to extend a file was not portable, but this behavior became required in
+.St -p1003.1-2008 .
--- a/lib/libfetch/common.c
+++ b/lib/libfetch/common.c
@ -677,6 +677,7 @@ fetch_connect(const char *host, int port, int af, int verbose)
 	if (sockshost)
 		if (!fetch_socks5_init(conn, host, port, verbose))
 			goto fail;
+	free(sockshost);
 	if (cais != NULL)
 		freeaddrinfo(cais);
 	if (sais != NULL)
@ -686,7 +687,10 @@ fetch_connect(const char *host, int port, int af, int verbose)
 	fetch_syserr();
 fail:
 	free(sockshost);
-	if (sd >= 0)
+	/* Fully close if it was opened; otherwise just don't leak the fd. */
+	if (conn != NULL)
+		fetch_close(conn);
+	else if (sd >= 0)
 		close(sd);
 	if (cais != NULL)
 		freeaddrinfo(cais);
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@ -342,7 +342,7 @@ struct da_softc {
 	LIST_HEAD(, ccb_hdr) pending_ccbs;
 	int	 refcount;		/* Active xpt_action() calls */
 	da_state state;
-	da_flags flags;
+	u_int	 flags;
 	da_quirks quirks;
 	int	 minimum_cmd_size;
 	int	 error_inject;
@ -2335,11 +2335,11 @@ dasysctlinit(void *context, int pending)
 	    "Flags for drive");
 	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
 	    OID_AUTO, "rotating", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
-	    &softc->flags, DA_FLAG_ROTATING, dabitsysctl, "I",
+	    &softc->flags, (u_int)DA_FLAG_ROTATING, dabitsysctl, "I",
 	    "Rotating media *DEPRECATED* gone in FreeBSD 14");
 	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
 	    OID_AUTO, "unmapped_io", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
-	    &softc->flags, DA_FLAG_UNMAPPEDIO, dabitsysctl, "I",
+	    &softc->flags, (u_int)DA_FLAG_UNMAPPEDIO, dabitsysctl, "I",
 	    "Unmapped I/O support *DEPRECATED* gone in FreeBSD 14");

 #ifdef CAM_TEST_FAILURE
@ -2619,11 +2619,11 @@ dadeletemethodchoose(struct da_softc *softc, da_delete_methods default_method)
 static int
 dabitsysctl(SYSCTL_HANDLER_ARGS)
 {
-	int flags = (intptr_t)arg1;
-	int test = arg2;
+	u_int *flags = arg1;
+	u_int test = arg2;
 	int tmpout, error;

-	tmpout = !!(flags & test);
+	tmpout = !!(*flags & test);
 	error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
 	if (error || !req->newptr)
 		return (error);
--- a/sys/compat/linuxkpi/common/include/linux/fs.h
+++ b/sys/compat/linuxkpi/common/include/linux/fs.h
@ -302,25 +302,4 @@ call_mmap(struct linux_file *file, struct vm_area_struct *vma)
 	return (file->f_op->mmap(file, vma));
 }

-/* Shared memory support */
-unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t);
-struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t);
-struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long);
-void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t);
-
-#define	invalidate_mapping_pages(...) \
-  linux_invalidate_mapping_pages(__VA_ARGS__)
-
-#define	shmem_read_mapping_page(...) \
-  linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0)
-
-#define	shmem_read_mapping_page_gfp(...) \
-  linux_shmem_read_mapping_page_gfp(__VA_ARGS__)
-
-#define	shmem_file_setup(...) \
-  linux_shmem_file_setup(__VA_ARGS__)
-
-#define	shmem_truncate_range(...) \
-  linux_shmem_truncate_range(__VA_ARGS__)
-
 #endif /* _LINUX_FS_H_ */
--- a/sys/compat/linuxkpi/common/include/linux/shmem_fs.h
+++ b/sys/compat/linuxkpi/common/include/linux/shmem_fs.h
@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2018 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef	_LINUX_SHMEM_FS_H_
+#define	_LINUX_SHMEM_FS_H_
+
+/* Shared memory support */
+unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t);
+struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t);
+struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long);
+void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t);
+
+#define	invalidate_mapping_pages(...) \
+  linux_invalidate_mapping_pages(__VA_ARGS__)
+
+#define	shmem_read_mapping_page(...) \
+  linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0)
+
+#define	shmem_read_mapping_page_gfp(...) \
+  linux_shmem_read_mapping_page_gfp(__VA_ARGS__)
+
+#define	shmem_file_setup(...) \
+  linux_shmem_file_setup(__VA_ARGS__)
+
+#define	shmem_truncate_range(...) \
+  linux_shmem_truncate_range(__VA_ARGS__)
+
+#endif /* _LINUX_SHMEM_FS_H_ */
--- a/sys/compat/linuxkpi/common/src/linux_page.c
+++ b/sys/compat/linuxkpi/common/src/linux_page.c
@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/fs.h>
+#include <linux/shmem_fs.h>

 void
 si_meminfo(struct sysinfo *si)
@ -275,86 +276,3 @@ is_vmalloc_addr(const void *addr)
 {
 	return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL);
 }
-
-struct page *
-linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp)
-{
-	vm_page_t page;
-	int rv;
-
-	if ((gfp & GFP_NOWAIT) != 0)
-		panic("GFP_NOWAIT is unimplemented");
-
-	VM_OBJECT_WLOCK(obj);
-	rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL |
-	    VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
-	VM_OBJECT_WUNLOCK(obj);
-	if (rv != VM_PAGER_OK)
-		return (ERR_PTR(-EINVAL));
-	return (page);
-}
-
-struct linux_file *
-linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags)
-{
-	struct fileobj {
-		struct linux_file file __aligned(sizeof(void *));
-		struct vnode vnode __aligned(sizeof(void *));
-	};
-	struct fileobj *fileobj;
-	struct linux_file *filp;
-	struct vnode *vp;
-	int error;
-
-	fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL);
-	if (fileobj == NULL) {
-		error = -ENOMEM;
-		goto err_0;
-	}
-	filp = &fileobj->file;
-	vp = &fileobj->vnode;
-
-	filp->f_count = 1;
-	filp->f_vnode = vp;
-	filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size,
-	    VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred);
-	if (filp->f_shmem == NULL) {
-		error = -ENOMEM;
-		goto err_1;
-	}
-	return (filp);
-err_1:
-	kfree(filp);
-err_0:
-	return (ERR_PTR(error));
-}
-
-static vm_ooffset_t
-linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start,
-    vm_pindex_t end, int flags)
-{
-	int start_count, end_count;
-
-	VM_OBJECT_WLOCK(obj);
-	start_count = obj->resident_page_count;
-	vm_object_page_remove(obj, start, end, flags);
-	end_count = obj->resident_page_count;
-	VM_OBJECT_WUNLOCK(obj);
-	return (start_count - end_count);
-}
-
-unsigned long
-linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end)
-{
-
-	return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY));
-}
-
-void
-linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend)
-{
-	vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1);
-	vm_pindex_t end = OFF_TO_IDX(lend + 1);
-
-	(void) linux_invalidate_mapping_pages_sub(obj, start, end, 0);
-}
--- a/sys/compat/linuxkpi/common/src/linux_shmemfs.c
+++ b/sys/compat/linuxkpi/common/src/linux_shmemfs.c
@ -0,0 +1,128 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
+ * Copyright (c) 2017 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/shmem_fs.h>
+
+struct page *
+linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp)
+{
+	vm_page_t page;
+	int rv;
+
+	if ((gfp & GFP_NOWAIT) != 0)
+		panic("GFP_NOWAIT is unimplemented");
+
+	VM_OBJECT_WLOCK(obj);
+	rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL |
+	    VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
+	VM_OBJECT_WUNLOCK(obj);
+	if (rv != VM_PAGER_OK)
+		return (ERR_PTR(-EINVAL));
+	return (page);
+}
+
+struct linux_file *
+linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	struct fileobj {
+		struct linux_file file __aligned(sizeof(void *));
+		struct vnode vnode __aligned(sizeof(void *));
+	};
+	struct fileobj *fileobj;
+	struct linux_file *filp;
+	struct vnode *vp;
+	int error;
+
+	fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL);
+	if (fileobj == NULL) {
+		error = -ENOMEM;
+		goto err_0;
+	}
+	filp = &fileobj->file;
+	vp = &fileobj->vnode;
+
+	filp->f_count = 1;
+	filp->f_vnode = vp;
+	filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size,
+	    VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred);
+	if (filp->f_shmem == NULL) {
+		error = -ENOMEM;
+		goto err_1;
+	}
+	return (filp);
+err_1:
+	kfree(filp);
+err_0:
+	return (ERR_PTR(error));
+}
+
+static vm_ooffset_t
+linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start,
+    vm_pindex_t end, int flags)
+{
+	int start_count, end_count;
+
+	VM_OBJECT_WLOCK(obj);
+	start_count = obj->resident_page_count;
+	vm_object_page_remove(obj, start, end, flags);
+	end_count = obj->resident_page_count;
+	VM_OBJECT_WUNLOCK(obj);
+	return (start_count - end_count);
+}
+
+unsigned long
+linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end)
+{
+
+	return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY));
+}
+
+void
+linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend)
+{
+	vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1);
+	vm_pindex_t end = OFF_TO_IDX(lend + 1);
+
+	(void) linux_invalidate_mapping_pages_sub(obj, start, end, 0);
+}
--- a/sys/conf/files
+++ b/sys/conf/files
@ -4476,6 +4476,8 @@ compat/linuxkpi/common/src/linux_rcu.c		optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C} -I$S/contrib/ck/include"
 compat/linuxkpi/common/src/linux_schedule.c	optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C}"
+compat/linuxkpi/common/src/linux_shmemfs.c	optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_slab.c		optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_usb.c		optional compat_linuxkpi usb \
--- a/sys/dev/acpica/acpi_lid.c
+++ b/sys/dev/acpica/acpi_lid.c
@ -124,13 +124,16 @@ acpi_lid_attach(device_t dev)
    if (acpi_parse_prw(sc->lid_handle, &prw) == 0)
 	AcpiEnableGpe(prw.gpe_handle, prw.gpe_bit);

+    /* Get the initial lid status, ignore failures */
+    (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status);
+
    /*
     * Export the lid status
     */
    SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
 	"state", CTLFLAG_RD, &sc->lid_status, 0,
-	"Device set to wake the system");
+	"Device state (0 = closed, 1 = open)");

    return (0);
 }
@ -144,6 +147,13 @@ acpi_lid_suspend(device_t dev)
 static int
 acpi_lid_resume(device_t dev)
 {
+    struct acpi_lid_softc	*sc;
+
+    sc = device_get_softc(dev);
+
+    /* Get lid status after resume, ignore failures */
+    (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status);
+
    return (0);
 }

--- a/sys/dev/ath/ah_osdep.c
+++ b/sys/dev/ath/ah_osdep.c
@ -93,8 +93,9 @@ extern	void DO_HALDEBUG(struct ath_hal *ah, u_int mask, const char* fmt, ...);
 #endif /* AH_DEBUG */

 /* NB: put this here instead of the driver to avoid circular references */
-SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD, 0, "Atheros driver parameters");
-static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD, 0,
+SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+    "Atheros driver parameters");
+static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
    "Atheros HAL parameters");

 #ifdef AH_DEBUG
@ -236,8 +237,10 @@ sysctl_hw_ath_hal_log(SYSCTL_HANDLER_ARGS)
 	else
 		return (ath_hal_setlogging(enable));
 }
-SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW,
-	0, 0, sysctl_hw_ath_hal_log, "I", "Enable HAL register logging");
+SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    0, 0, sysctl_hw_ath_hal_log, "I",
+    "Enable HAL register logging");
 SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_size, CTLFLAG_RW,
 	&ath_hal_alq_qsize, 0, "In-memory log size (#records)");
 SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_lost, CTLFLAG_RW,
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@ -1364,17 +1364,17 @@ ath_rate_sysctlattach(struct ath_softc *sc, struct sample_softc *ssc)
 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev);

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
-	    ath_rate_sysctl_smoothing_rate, "I",
+	    "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    ssc, 0, ath_rate_sysctl_smoothing_rate, "I",
 	    "sample: smoothing rate for avg tx time (%%)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "sample_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
-	    ath_rate_sysctl_sample_rate, "I",
+	    "sample_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    ssc, 0, ath_rate_sysctl_sample_rate, "I",
 	    "sample: percent air time devoted to sampling new rates (%%)");
 	/* XXX max_successive_failures, stale_failure_timeout, min_switch */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "sample_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-	    ath_rate_sysctl_stats, "I", "sample: print statistics");
+	    "sample_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    sc, 0, ath_rate_sysctl_stats, "I", "sample: print statistics");
 }

 struct ath_ratectrl *
--- a/sys/dev/ath/if_ath_sysctl.c
+++ b/sys/dev/ath/if_ath_sysctl.c
@ -786,16 +786,17 @@ ath_sysctl_alq_attach(struct ath_softc *sc)
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);

-	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq", CTLFLAG_RD,
-	    NULL, "Atheros ALQ logging parameters");
+	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+	    "Atheros ALQ logging parameters");
 	child = SYSCTL_CHILDREN(tree);

 	SYSCTL_ADD_STRING(ctx, child, OID_AUTO, "filename",
 	    CTLFLAG_RW, sc->sc_alq.sc_alq_filename, 0, "ALQ filename");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"enable", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_alq_log, "I", "");
+	    "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_alq_log, "I", "");

 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"debugmask", CTLFLAG_RW, &sc->sc_alq.sc_alq_debug, 0,
@ -831,21 +832,21 @@ ath_sysctlattach(struct ath_softc *sc)
 		"control debugging KTR");
 #endif /* ATH_DEBUG_ALQ */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"slottime", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_slottime, "I", "802.11 slot time (us)");
+	    "slottime", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_slottime, "I", "802.11 slot time (us)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"acktimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)");
+	    "acktimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"ctstimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)");
+	    "ctstimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"softled", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_softled, "I", "enable/disable software LED support");
+	    "softled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_softled, "I", "enable/disable software LED support");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"ledpin", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_ledpin, "I", "GPIO pin connected to LED");
+	    "ledpin", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_ledpin, "I", "GPIO pin connected to LED");
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"ledon", CTLFLAG_RW, &sc->sc_ledon, 0,
 		"setting to turn LED on");
@ -854,8 +855,8 @@ ath_sysctlattach(struct ath_softc *sc)
 		"idle time for inactivity LED (ticks)");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"hardled", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_hardled, "I", "enable/disable hardware LED support");
+	    "hardled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_hardled, "I", "enable/disable hardware LED support");
 	/* XXX Laziness - configure pins, then flip hardled off/on */
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"led_net_pin", CTLFLAG_RW, &sc->sc_led_net_pin, 0,
@ -865,61 +866,61 @@ ath_sysctlattach(struct ath_softc *sc)
 		"MAC Power LED pin, or -1 to disable");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"txantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_txantenna, "I", "antenna switch");
+	    "txantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_txantenna, "I", "antenna switch");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"rxantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_rxantenna, "I", "default/rx antenna");
+	    "rxantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_rxantenna, "I", "default/rx antenna");
 	if (ath_hal_hasdiversity(ah))
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"diversity", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_diversity, "I", "antenna diversity");
+		    "diversity", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+		    sc, 0, ath_sysctl_diversity, "I", "antenna diversity");
 	sc->sc_txintrperiod = ATH_TXINTR_PERIOD;
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"txintrperiod", CTLFLAG_RW, &sc->sc_txintrperiod, 0,
 		"tx descriptor batching");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"diag", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_diag, "I", "h/w diagnostic control");
+	    "diag", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_diag, "I", "h/w diagnostic control");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"tpscale", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_tpscale, "I", "tx power scaling");
+	    "tpscale", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_tpscale, "I", "tx power scaling");
 	if (ath_hal_hastpc(ah)) {
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"tpc", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_tpc, "I", "enable/disable per-packet TPC");
+		    "tpc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+		    ath_sysctl_tpc, "I", "enable/disable per-packet TPC");
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"tpack", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_tpack, "I", "tx power for ack frames");
+		    "tpack", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+		    0, ath_sysctl_tpack, "I", "tx power for ack frames");
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"tpcts", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_tpcts, "I", "tx power for cts frames");
+		    "tpcts", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+		    0, ath_sysctl_tpcts, "I", "tx power for cts frames");
 	}
 	if (ath_hal_hasrfsilent(ah)) {
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"rfsilent", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_rfsilent, "I", "h/w RF silent config");
+		    "rfsilent", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+		    sc, 0, ath_sysctl_rfsilent, "I", "h/w RF silent config");
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"rfkill", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_rfkill, "I", "enable/disable RF kill switch");
+		    "rfkill", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+		    0, ath_sysctl_rfkill, "I", "enable/disable RF kill switch");
 	}

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"txagg", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_txagg, "I", "");
+	    "txagg", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_txagg, "I", "");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"forcebstuck", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_forcebstuck, "I", "");
+	    "forcebstuck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+	    0, ath_sysctl_forcebstuck, "I", "");

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-		"hangcheck", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-		ath_sysctl_hangcheck, "I", "");
+	    "hangcheck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+	    ath_sysctl_hangcheck, "I", "");

 	if (ath_hal_hasintmit(ah)) {
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"intmit", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_intmit, "I", "interference mitigation");
+		    "intmit", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+		    0, ath_sysctl_intmit, "I", "interference mitigation");
 	}
 	sc->sc_monpass = HAL_RXERR_DECRYPT | HAL_RXERR_MIC;
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
@ -989,8 +990,8 @@ ath_sysctlattach(struct ath_softc *sc)
 			"superframe", CTLFLAG_RD, &sc->sc_tdmabintval, 0,
 			"TDMA calculated super frame");
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-			"setcca", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-			ath_sysctl_setcca, "I", "enable CCA control");
+		    "setcca", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+		    sc, 0, ath_sysctl_setcca, "I", "enable CCA control");
 	}
 #endif

@ -1028,7 +1029,8 @@ ath_sysctl_stats_attach_rxphyerr(struct ath_softc *sc, struct sysctl_oid_list *p
 	int i;
 	char sn[8];

-	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err", CTLFLAG_RD, NULL, "Per-code RX PHY Errors");
+	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Per-code RX PHY Errors");
 	child = SYSCTL_CHILDREN(tree);
 	for (i = 0; i < 64; i++) {
 		snprintf(sn, sizeof(sn), "%d", i);
@ -1047,7 +1049,7 @@ ath_sysctl_stats_attach_intr(struct ath_softc *sc,
 	char sn[8];

 	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "sync_intr",
-	    CTLFLAG_RD, NULL, "Sync interrupt statistics");
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Sync interrupt statistics");
 	child = SYSCTL_CHILDREN(tree);
 	for (i = 0; i < 32; i++) {
 		snprintf(sn, sizeof(sn), "%d", i);
@ -1065,12 +1067,12 @@ ath_sysctl_stats_attach(struct ath_softc *sc)
 
 	/* Create "clear" node */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "clear_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
-	    ath_sysctl_clearstats, "I", "clear stats");
+	    "clear_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+	    0, ath_sysctl_clearstats, "I", "clear stats");

 	/* Create stats node */
-	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD,
-	    NULL, "Statistics");
+	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
 	child = SYSCTL_CHILDREN(tree);

 	/* This was generated from if_athioctl.h */
@ -1315,8 +1317,8 @@ ath_sysctl_hal_attach(struct ath_softc *sc)
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);

-	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal", CTLFLAG_RD,
-	    NULL, "Atheros HAL parameters");
+	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Atheros HAL parameters");
 	child = SYSCTL_CHILDREN(tree);

 	sc->sc_ah->ah_config.ah_debug = 0;
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
@ -650,9 +650,13 @@ struct mlx5_ib_congestion {
 	struct sysctl_ctx_list ctx;
 	struct sx lock;
 	struct delayed_work dwork;
-	u64	arg [0];
-	MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR)
-	MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR)
+	union {
+		u64	arg[1];
+		struct {
+			MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR)
+			MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR)
+		};
+	};
 };

 struct mlx5_ib_dev {
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -37,7 +37,9 @@ static const char *mlx5_ib_cong_stats_desc[] = {
 	MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC)
 };

-#define	MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64))
+#define	MLX5_IB_INDEX(field) ( \
+    (__offsetof(struct mlx5_ib_congestion, field) - \
+     __offsetof(struct mlx5_ib_congestion, arg[0])) / sizeof(u64))
 #define	MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL)
 #define	MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \
  /* rangecheck */					\
--- a/sys/dev/otus/if_otus.c
+++ b/sys/dev/otus/if_otus.c
@ -75,7 +75,8 @@ __FBSDID("$FreeBSD$");
 #include "if_otusreg.h"

 static int otus_debug = 0;
-static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW, 0, "USB otus");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "USB otus");
 SYSCTL_INT(_hw_usb_otus, OID_AUTO, debug, CTLFLAG_RWTUN, &otus_debug, 0,
    "Debug level");
 #define	OTUS_DEBUG_XMIT		0x00000001
--- a/sys/kern/subr_compressor.c
+++ b/sys/kern/subr_compressor.c
@ -117,6 +117,13 @@ gz_init(size_t maxiosize, int level)
 	s->gz_stream.next_in = Z_NULL;
 	s->gz_stream.avail_in = 0;

+	if (level != Z_DEFAULT_COMPRESSION) {
+		if (level < Z_BEST_SPEED)
+			level = Z_BEST_SPEED;
+		else if (level > Z_BEST_COMPRESSION)
+			level = Z_BEST_COMPRESSION;
+	}
+
 	error = deflateInit2(&s->gz_stream, level, Z_DEFLATED, -MAX_WBITS,
 	    DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 	if (error != 0)
--- a/sys/kern/subr_smr.c
+++ b/sys/kern/subr_smr.c
@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$");
 #include <vm/uma.h>

 /*
+ * Global Unbounded Sequences (GUS)
+ *
 * This is a novel safe memory reclamation technique inspired by
 * epoch based reclamation from Samy Al Bahra's concurrency kit which
 * in turn was based on work described in:
@ -53,7 +55,8 @@ __FBSDID("$FreeBSD$");
 * This is not an implementation of hazard pointers or related
 * techniques.  The term safe memory reclamation is used as a
 * generic descriptor for algorithms that defer frees to avoid
- * use-after-free errors with lockless datastructures.
+ * use-after-free errors with lockless datastructures or as
+ * a mechanism to detect quiescence for writer synchronization.
 *
 * The basic approach is to maintain a monotonic write sequence
 * number that is updated on some application defined granularity.
@ -67,7 +70,7 @@ __FBSDID("$FreeBSD$");
 * a global write clock that is used to mark memory on free.
 *
 * The write and read sequence numbers can be thought of as a two
- * handed clock with readers always advancing towards writers.  SMR
+ * handed clock with readers always advancing towards writers.  GUS 
 * maintains the invariant that all readers can safely access memory
 * that was visible at the time they loaded their copy of the sequence
 * number.  Periodically the read sequence or hand is polled and
@ -80,9 +83,12 @@ __FBSDID("$FreeBSD$");
 * A stored sequence number that falls outside of this range has expired
 * and needs no scan to reclaim.
 *
- * A notable distinction between this SMR and Epoch, qsbr, rcu, etc. is
+ * A notable distinction between GUS and Epoch, qsbr, rcu, etc. is
 * that advancing the sequence number is decoupled from detecting its
- * observation.  This results in a more granular assignment of sequence
+ * observation.  That is to say, the delta between read and write
+ * sequence numbers is not bound.  This can be thought of as a more
+ * generalized form of epoch which requires them at most one step
+ * apart.  This results in a more granular assignment of sequence
 * numbers even as read latencies prohibit all or some expiration.
 * It also allows writers to advance the sequence number and save the
 * poll for expiration until a later time when it is likely to
@ -164,31 +170,192 @@ static uma_zone_t smr_zone;
 #define	SMR_SEQ_MAX_ADVANCE	SMR_SEQ_MAX_DELTA / 2
 #endif

-static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW, NULL, "SMR Stats");
-static counter_u64_t advance = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RD, &advance, "");
-static counter_u64_t advance_wait = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RD, &advance_wait, "");
-static counter_u64_t poll = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RD, &poll, "");
-static counter_u64_t poll_scan = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RD, &poll_scan, "");
-
+/*
+ * The grace period for lazy (tick based) SMR.
+ *
+ * Hardclock is responsible for advancing ticks on a single CPU while every
+ * CPU receives a regular clock interrupt.  The clock interrupts are flushing
+ * the store buffers and any speculative loads that may violate our invariants.
+ * Because these interrupts are not synchronized we must wait one additional
+ * tick in the future to be certain that all processors have had their state
+ * synchronized by an interrupt.
+ *
+ * This assumes that the clock interrupt will only be delayed by other causes
+ * that will flush the store buffer or prevent access to the section protected
+ * data.  For example, an idle processor, or an system management interrupt,
+ * or a vm exit.
+ *
+ * We must wait one additional tick if we are around the wrap condition
+ * because the write seq will move forward by two with one interrupt.
+ */
+#define	SMR_LAZY_GRACE		2
+#define	SMR_LAZY_GRACE_MAX	(SMR_LAZY_GRACE + 1)

 /*
- * Advance the write sequence and return the new value for use as the
+ * The maximum sequence number ahead of wr_seq that may still be valid.  The
+ * sequence may not be advanced on write for lazy or deferred SMRs.  In this
+ * case poll needs to attempt to forward the sequence number if the goal is
+ * within wr_seq + SMR_SEQ_ADVANCE.
+ */
+#define	SMR_SEQ_ADVANCE		MAX(SMR_SEQ_INCR, SMR_LAZY_GRACE_MAX)
+
+static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW, NULL, "SMR Stats");
+static counter_u64_t advance = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RW, &advance, "");
+static counter_u64_t advance_wait = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RW, &advance_wait, "");
+static counter_u64_t poll = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RW, &poll, "");
+static counter_u64_t poll_scan = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RW, &poll_scan, "");
+static counter_u64_t poll_fail = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_fail, CTLFLAG_RW, &poll_fail, "");
+
+/*
+ * Advance a lazy write sequence number.  These move forward at the rate of
+ * ticks.  Grace is two ticks in the future.  lazy write sequence numbers can
+ * be even but not SMR_SEQ_INVALID so we pause time for a tick when we wrap.
+ *
+ * This returns the _current_ write sequence number.  The lazy goal sequence
+ * number is SMR_LAZY_GRACE ticks ahead.
+ */
+static smr_seq_t
+smr_lazy_advance(smr_t smr, smr_shared_t s)
+{
+	smr_seq_t s_rd_seq, s_wr_seq, goal;
+	int t;
+
+	CRITICAL_ASSERT(curthread);
+
+	/*
+	 * Load s_wr_seq prior to ticks to ensure that the thread that
+	 * observes the largest value wins.
+	 */
+	s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
+
+	/*
+	 * We must not allow a zero tick value.  We go back in time one tick
+	 * and advance the grace period forward one tick around zero.
+	 */
+	t = ticks;
+	if (t == SMR_SEQ_INVALID)
+		t--;
+
+	/*
+	 * The most probable condition that the update already took place.
+	 */
+	if (__predict_true(t == s_wr_seq))
+		goto out;
+
+	/*
+	 * After long idle periods the read sequence may fall too far
+	 * behind write.  Prevent poll from ever seeing this condition
+	 * by updating the stale rd_seq.  This assumes that there can
+	 * be no valid section 2bn ticks old.  The rd_seq update must
+	 * be visible before wr_seq to avoid races with other advance
+	 * callers.
+	 */
+	s_rd_seq = atomic_load_int(&s->s_rd_seq);
+	if (SMR_SEQ_GT(s_rd_seq, t))
+		atomic_cmpset_rel_int(&s->s_rd_seq, s_rd_seq, t);
+
+	/*
+	 * Release to synchronize with the wr_seq load above.  Ignore
+	 * cmpset failures from simultaneous updates.
+	 */
+	atomic_cmpset_rel_int(&s->s_wr_seq, s_wr_seq, t);
+	counter_u64_add(advance, 1);
+	/* If we lost either update race another thread did it. */
+	s_wr_seq = t;
+out:
+	goal = s_wr_seq + SMR_LAZY_GRACE;
+	/* Skip over the SMR_SEQ_INVALID tick. */
+	if (goal < SMR_LAZY_GRACE)
+		goal++;
+	return (goal);
+}
+
+/*
+ * Increment the shared write sequence by 2.  Since it is initialized
+ * to 1 this means the only valid values are odd and an observed value
+ * of 0 in a particular CPU means it is not currently in a read section.
+ */
+static smr_seq_t
+smr_shared_advance(smr_shared_t s)
+{
+
+	return (atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR);
+}
+
+/*
+ * Advance the write sequence number for a normal smr section.  If the
+ * write sequence is too far behind the read sequence we have to poll
+ * to advance rd_seq and prevent undetectable wraps.
+ */
+static smr_seq_t
+smr_default_advance(smr_t smr, smr_shared_t s)
+{
+	smr_seq_t goal, s_rd_seq;
+
+	CRITICAL_ASSERT(curthread);
+	KASSERT((zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+	    ("smr_default_advance: called with lazy smr."));
+
+	/*
+	 * Load the current read seq before incrementing the goal so
+	 * we are guaranteed it is always < goal.
+	 */
+	s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
+	goal = smr_shared_advance(s);
+
+	/*
+	 * Force a synchronization here if the goal is getting too
+	 * far ahead of the read sequence number.  This keeps the
+	 * wrap detecting arithmetic working in pathological cases.
+	 */
+	if (SMR_SEQ_DELTA(goal, s_rd_seq) >= SMR_SEQ_MAX_DELTA) {
+		counter_u64_add(advance_wait, 1);
+		smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE);
+	}
+	counter_u64_add(advance, 1);
+
+	return (goal);
+}
+
+/*
+ * Deferred SMRs conditionally update s_wr_seq based on an
+ * cpu local interval count.
+ */
+static smr_seq_t
+smr_deferred_advance(smr_t smr, smr_shared_t s, smr_t self)
+{
+
+	if (++self->c_deferred < self->c_limit)
+		return (smr_shared_current(s) + SMR_SEQ_INCR);
+	self->c_deferred = 0;
+	return (smr_default_advance(smr, s));
+}
+
+/*
+ * Advance the write sequence and return the value for use as the
 * wait goal.  This guarantees that any changes made by the calling
 * thread prior to this call will be visible to all threads after
 * rd_seq meets or exceeds the return value.
 *
 * This function may busy loop if the readers are roughly 1 billion
 * sequence numbers behind the writers.
+ *
+ * Lazy SMRs will not busy loop and the wrap happens every 49.6 days
+ * at 1khz and 119 hours at 10khz.  Readers can block for no longer
+ * than half of this for SMR_SEQ_ macros to continue working.
 */
 smr_seq_t
 smr_advance(smr_t smr)
 {
+	smr_t self;
 	smr_shared_t s;
-	smr_seq_t goal, s_rd_seq;
+	smr_seq_t goal;
+	int flags;

 	/*
 	 * It is illegal to enter while in an smr section.
@ -201,55 +368,121 @@ smr_advance(smr_t smr)
 	 */
 	atomic_thread_fence_rel();

-	/*
-	 * Load the current read seq before incrementing the goal so
-	 * we are guaranteed it is always < goal.
-	 */
-	s = zpcpu_get(smr)->c_shared;
-	s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
-
-	/*
-	 * Increment the shared write sequence by 2.  Since it is
-	 * initialized to 1 this means the only valid values are
-	 * odd and an observed value of 0 in a particular CPU means
-	 * it is not currently in a read section.
-	 */
-	goal = atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR;
-	counter_u64_add(advance, 1);
-
-	/*
-	 * Force a synchronization here if the goal is getting too
-	 * far ahead of the read sequence number.  This keeps the
-	 * wrap detecting arithmetic working in pathological cases.
-	 */
-	if (SMR_SEQ_DELTA(goal, s_rd_seq) >= SMR_SEQ_MAX_DELTA) {
-		counter_u64_add(advance_wait, 1);
-		smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE);
-	}
+	critical_enter();
+	/* Try to touch the line once. */
+	self = zpcpu_get(smr);
+	s = self->c_shared;
+	flags = self->c_flags;
+	goal = SMR_SEQ_INVALID;
+	if ((flags & (SMR_LAZY | SMR_DEFERRED)) == 0)
+		goal = smr_default_advance(smr, s);
+	else if ((flags & SMR_LAZY) != 0)
+		goal = smr_lazy_advance(smr, s);
+	else if ((flags & SMR_DEFERRED) != 0)
+		goal = smr_deferred_advance(smr, s, self);
+	critical_exit();

 	return (goal);
 }

-smr_seq_t
-smr_advance_deferred(smr_t smr, int limit)
+/*
+ * Poll to determine the currently observed sequence number on a cpu
+ * and spinwait if the 'wait' argument is true.
+ */
+static smr_seq_t
+smr_poll_cpu(smr_t c, smr_seq_t s_rd_seq, smr_seq_t goal, bool wait)
 {
-	smr_seq_t goal;
-	smr_t csmr;
+	smr_seq_t c_seq;

-	SMR_ASSERT_NOT_ENTERED(smr);
+	c_seq = SMR_SEQ_INVALID;
+	for (;;) {
+		c_seq = atomic_load_int(&c->c_seq);
+		if (c_seq == SMR_SEQ_INVALID)
+			break;

-	critical_enter();
-	csmr = zpcpu_get(smr);
-	if (++csmr->c_deferred >= limit) {
-		goal = SMR_SEQ_INVALID;
-		csmr->c_deferred = 0;
-	} else
-		goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR;
-	critical_exit();
-	if (goal != SMR_SEQ_INVALID)
-		return (goal);
+		/*
+		 * There is a race described in smr.h:smr_enter that
+		 * can lead to a stale seq value but not stale data
+		 * access.  If we find a value out of range here we
+		 * pin it to the current min to prevent it from
+		 * advancing until that stale section has expired.
+		 *
+		 * The race is created when a cpu loads the s_wr_seq
+		 * value in a local register and then another thread
+		 * advances s_wr_seq and calls smr_poll() which will
+		 * oberve no value yet in c_seq and advance s_rd_seq
+		 * up to s_wr_seq which is beyond the register
+		 * cached value.  This is only likely to happen on
+		 * hypervisor or with a system management interrupt.
+		 */
+		if (SMR_SEQ_LT(c_seq, s_rd_seq))
+			c_seq = s_rd_seq;

-	return (smr_advance(smr));
+		/*
+		 * If the sequence number meets the goal we are done
+		 * with this cpu.
+		 */
+		if (SMR_SEQ_LEQ(goal, c_seq))
+			break;
+
+		if (!wait)
+			break;
+		cpu_spinwait();
+	}
+
+	return (c_seq);
+}
+
+/*
+ * Loop until all cores have observed the goal sequence or have
+ * gone inactive.  Returns the oldest sequence currently active;
+ *
+ * This function assumes a snapshot of sequence values has
+ * been obtained and validated by smr_poll().
+ */
+static smr_seq_t
+smr_poll_scan(smr_t smr, smr_shared_t s, smr_seq_t s_rd_seq,
+    smr_seq_t s_wr_seq, smr_seq_t goal, bool wait)
+{
+	smr_seq_t rd_seq, c_seq;
+	int i;
+
+	CRITICAL_ASSERT(curthread);
+	counter_u64_add_protected(poll_scan, 1);
+
+	/*
+	 * The read sequence can be no larger than the write sequence at
+	 * the start of the poll.
+	 */
+	rd_seq = s_wr_seq;
+	CPU_FOREACH(i) {
+		/*
+		 * Query the active sequence on this cpu.  If we're not
+		 * waiting and we don't meet the goal we will still scan
+		 * the rest of the cpus to update s_rd_seq before returning
+		 * failure.
+		 */
+		c_seq = smr_poll_cpu(zpcpu_get_cpu(smr, i), s_rd_seq, goal,
+		    wait);
+
+		/*
+		 * Limit the minimum observed rd_seq whether we met the goal
+		 * or not.
+		 */
+		if (c_seq != SMR_SEQ_INVALID)
+			rd_seq = SMR_SEQ_MIN(rd_seq, c_seq);
+	}
+
+	/*
+	 * Advance the rd_seq as long as we observed a more recent value.
+	 */
+	s_rd_seq = atomic_load_int(&s->s_rd_seq);
+	if (SMR_SEQ_GEQ(rd_seq, s_rd_seq)) {
+		atomic_cmpset_int(&s->s_rd_seq, s_rd_seq, rd_seq);
+		s_rd_seq = rd_seq;
+	}
+
+	return (s_rd_seq);
 }

 /*
@ -268,9 +501,10 @@ bool
 smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 {
 	smr_shared_t s;
-	smr_t c;
-	smr_seq_t s_wr_seq, s_rd_seq, rd_seq, c_seq;
-	int i;
+	smr_t self;
+	smr_seq_t s_wr_seq, s_rd_seq;
+	smr_delta_t delta;
+	int flags;
 	bool success;

 	/*
@ -278,6 +512,8 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 	 */
 	KASSERT(!wait || !SMR_ENTERED(smr),
 	    ("smr_poll: Blocking not allowed in a SMR section."));
+	KASSERT(!wait || (zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+	    ("smr_poll: Blocking not allowed on lazy smrs."));

 	/*
 	 * Use a critical section so that we can avoid ABA races
@ -285,9 +521,19 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 	 */
 	success = true;
 	critical_enter();
-	s = zpcpu_get(smr)->c_shared;
+	/* Attempt to load from self only once. */
+	self = zpcpu_get(smr);
+	s = self->c_shared;
+	flags = self->c_flags;
 	counter_u64_add_protected(poll, 1);

+	/*
+	 * Conditionally advance the lazy write clock on any writer
+	 * activity.  This may reset s_rd_seq.
+	 */
+	if ((flags & SMR_LAZY) != 0)
+		smr_lazy_advance(smr, s);
+
 	/*
 	 * Acquire barrier loads s_wr_seq after s_rd_seq so that we can not
 	 * observe an updated read sequence that is larger than write.
@ -295,106 +541,59 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 	s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);

 	/*
-	 * wr_seq must be loaded prior to any c_seq value so that a stale
-	 * c_seq can only reference time after this wr_seq.
+	 * If we have already observed the sequence number we can immediately
+	 * return success.  Most polls should meet this criterion.
+	 */
+	if (SMR_SEQ_LEQ(goal, s_rd_seq))
+		goto out;
+
+	/*
+	 * wr_seq must be loaded prior to any c_seq value so that a
+	 * stale c_seq can only reference time after this wr_seq.
 	 */
 	s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);

 	/*
-	 * This may have come from a deferred advance.  Consider one
-	 * increment past the current wr_seq valid and make sure we
-	 * have advanced far enough to succeed.  We simply add to avoid
-	 * an additional fence.
+	 * This is the distance from s_wr_seq to goal.  Positive values
+	 * are in the future.
 	 */
-	if (goal == s_wr_seq + SMR_SEQ_INCR) {
-		atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
-		s_wr_seq = goal;
+	delta = SMR_SEQ_DELTA(goal, s_wr_seq);
+
+	/*
+	 * Detect a stale wr_seq.
+	 *
+	 * This goal may have come from a deferred advance or a lazy
+	 * smr.  If we are not blocking we can not succeed but the
+	 * sequence number is valid.
+	 */
+	if (delta > 0 && delta <= SMR_SEQ_MAX_ADVANCE &&
+	    (flags & (SMR_LAZY | SMR_DEFERRED)) != 0) {
+		if (!wait) {
+			success = false;
+			goto out;
+		}
+		/* LAZY is always !wait. */
+		s_wr_seq = smr_shared_advance(s);
+		delta = 0;
 	}

 	/*
-	 * Detect whether the goal is valid and has already been observed.
+	 * Detect an invalid goal.
 	 *
 	 * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for
 	 * it to be valid.  If it is not then the caller held on to it and
 	 * the integer wrapped.  If we wrapped back within range the caller
 	 * will harmlessly scan.
-	 *
-	 * A valid goal must be greater than s_rd_seq or we have not verified
-	 * that it has been observed and must fall through to polling.
 	 */
-	if (SMR_SEQ_GEQ(s_rd_seq, goal) || SMR_SEQ_LT(s_wr_seq, goal))
+	if (delta > 0)
 		goto out;

-	/*
-	 * Loop until all cores have observed the goal sequence or have
-	 * gone inactive.  Keep track of the oldest sequence currently
-	 * active as rd_seq.
-	 */
-	counter_u64_add_protected(poll_scan, 1);
-	rd_seq = s_wr_seq;
-	CPU_FOREACH(i) {
-		c = zpcpu_get_cpu(smr, i);
-		c_seq = SMR_SEQ_INVALID;
-		for (;;) {
-			c_seq = atomic_load_int(&c->c_seq);
-			if (c_seq == SMR_SEQ_INVALID)
-				break;
-
-			/*
-			 * There is a race described in smr.h:smr_enter that
-			 * can lead to a stale seq value but not stale data
-			 * access.  If we find a value out of range here we
-			 * pin it to the current min to prevent it from
-			 * advancing until that stale section has expired.
-			 *
-			 * The race is created when a cpu loads the s_wr_seq
-			 * value in a local register and then another thread
-			 * advances s_wr_seq and calls smr_poll() which will 
-			 * oberve no value yet in c_seq and advance s_rd_seq
-			 * up to s_wr_seq which is beyond the register
-			 * cached value.  This is only likely to happen on
-			 * hypervisor or with a system management interrupt.
-			 */
-			if (SMR_SEQ_LT(c_seq, s_rd_seq))
-				c_seq = s_rd_seq;
-
-			/*
-			 * If the sequence number meets the goal we are
-			 * done with this cpu.
-			 */
-			if (SMR_SEQ_GEQ(c_seq, goal))
-				break;
-
-			/*
-			 * If we're not waiting we will still scan the rest
-			 * of the cpus and update s_rd_seq before returning
-			 * an error.
-			 */
-			if (!wait) {
-				success = false;
-				break;
-			}
-			cpu_spinwait();
-		}
-
-		/*
-		 * Limit the minimum observed rd_seq whether we met the goal
-		 * or not.
-		 */
-		if (c_seq != SMR_SEQ_INVALID && SMR_SEQ_GT(rd_seq, c_seq))
-			rd_seq = c_seq;
-	}
-
-	/*
-	 * Advance the rd_seq as long as we observed the most recent one.
-	 */
-	s_rd_seq = atomic_load_int(&s->s_rd_seq);
-	do {
-		if (SMR_SEQ_LEQ(rd_seq, s_rd_seq))
-			goto out;
-	} while (atomic_fcmpset_int(&s->s_rd_seq, &s_rd_seq, rd_seq) == 0);
-
+	/* Determine the lowest visible sequence number. */
+	s_rd_seq = smr_poll_scan(smr, s, s_rd_seq, s_wr_seq, goal, wait);
+	success = SMR_SEQ_LEQ(goal, s_rd_seq);
 out:
+	if (!success)
+		counter_u64_add_protected(poll_fail, 1);
 	critical_exit();

 	/*
@ -407,7 +606,7 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 }

 smr_t
-smr_create(const char *name)
+smr_create(const char *name, int limit, int flags)
 {
 	smr_t smr, c;
 	smr_shared_t s;
@ -417,13 +616,19 @@ smr_create(const char *name)
 	smr = uma_zalloc_pcpu(smr_zone, M_WAITOK);

 	s->s_name = name;
-	s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+	if ((flags & SMR_LAZY) == 0)
+		s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+	else
+		s->s_rd_seq = s->s_wr_seq = ticks;

 	/* Initialize all CPUS, not just those running. */
 	for (i = 0; i <= mp_maxid; i++) {
 		c = zpcpu_get_cpu(smr, i);
 		c->c_seq = SMR_SEQ_INVALID;
 		c->c_shared = s;
+		c->c_deferred = 0;
+		c->c_limit = limit;
+		c->c_flags = flags;
 	}
 	atomic_thread_fence_seq_cst();

@ -460,5 +665,6 @@ smr_init_counters(void *unused)
 	advance_wait = counter_u64_alloc(M_WAITOK);
 	poll = counter_u64_alloc(M_WAITOK);
 	poll_scan = counter_u64_alloc(M_WAITOK);
+	poll_fail = counter_u64_alloc(M_WAITOK);
 }
 SYSINIT(smr_counters, SI_SUB_CPU, SI_ORDER_ANY, smr_init_counters, NULL);
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@ -326,21 +326,23 @@ ast(struct trapframe *framep)
 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
 	    !SIGISEMPTY(p->p_siglist)) {
 		sigfastblock_fetch(td);
-		PROC_LOCK(p);
-		mtx_lock(&p->p_sigacts->ps_mtx);
 		if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 &&
 		    td->td_sigblock_val != 0) {
 			sigfastblock_setpend(td);
+			PROC_LOCK(p);
 			reschedule_signals(p, fastblock_mask,
-			    SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK);
+			    SIGPROCMASK_FASTBLK);
+			PROC_UNLOCK(p);
 		} else {
+			PROC_LOCK(p);
+			mtx_lock(&p->p_sigacts->ps_mtx);
 			while ((sig = cursig(td)) != 0) {
 				KASSERT(sig >= 0, ("sig %d", sig));
 				postsig(sig);
 			}
+			mtx_unlock(&p->p_sigacts->ps_mtx);
+			PROC_UNLOCK(p);
 		}
-		mtx_unlock(&p->p_sigacts->ps_mtx);
-		PROC_UNLOCK(p);
 	}

 	/*
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@ -403,14 +403,6 @@ namei(struct nameidata *ndp)
 	ndp->ni_rootdir = fdp->fd_rdir;
 	ndp->ni_topdir = fdp->fd_jdir;

-	/*
-	 * If we are auditing the kernel pathname, save the user pathname.
-	 */
-	if (cnp->cn_flags & AUDITVNODE1)
-		AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf);
-	if (cnp->cn_flags & AUDITVNODE2)
-		AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf);
-
 	startdir_used = 0;
 	dp = NULL;
 	cnp->cn_nameptr = cnp->cn_pnbuf;
@ -505,6 +497,13 @@ namei(struct nameidata *ndp)
 			ndp->ni_lcf |= NI_LCF_LATCH;
 	}
 	FILEDESC_SUNLOCK(fdp);
+	/*
+	 * If we are auditing the kernel pathname, save the user pathname.
+	 */
+	if (cnp->cn_flags & AUDITVNODE1)
+		AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
+	if (cnp->cn_flags & AUDITVNODE2)
+		AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
 	if (ndp->ni_startdir != NULL && !startdir_used)
 		vrele(ndp->ni_startdir);
 	if (error != 0) {
--- a/sys/modules/linuxkpi/Makefile
+++ b/sys/modules/linuxkpi/Makefile
@ -15,6 +15,7 @@ SRCS=	linux_compat.c \
 	linux_rcu.c \
 	linux_seq_file.c \
 	linux_schedule.c \
+	linux_shmemfs.c \
 	linux_slab.c \
 	linux_tasklet.c \
 	linux_usb.c \
--- a/sys/net80211/ieee80211_alq.c
+++ b/sys/net80211/ieee80211_alq.c
@ -111,8 +111,10 @@ sysctl_ieee80211_alq_log(SYSCTL_HANDLER_ARGS)
 		return (ieee80211_alq_setlogging(enable));
 }

-SYSCTL_PROC(_net_wlan, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW,
-	0, 0, sysctl_ieee80211_alq_log, "I", "Enable net80211 alq logging");
+SYSCTL_PROC(_net_wlan, OID_AUTO, alq,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0,
+    sysctl_ieee80211_alq_log, "I",
+    "Enable net80211 alq logging");
 SYSCTL_INT(_net_wlan, OID_AUTO, alq_size, CTLFLAG_RW,
 	&ieee80211_alq_qsize, 0, "In-memory log size (bytes)");
 SYSCTL_INT(_net_wlan, OID_AUTO, alq_lost, CTLFLAG_RW,
--- a/sys/net80211/ieee80211_amrr.c
+++ b/sys/net80211/ieee80211_amrr.c
@ -465,8 +465,8 @@ amrr_sysctlattach(struct ieee80211vap *vap,
 		return;

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap,
-	    0, amrr_sysctl_interval, "I", "amrr operation interval (ms)");
+	    "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    vap, 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)");
 	/* XXX bounds check values */
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "amrr_max_sucess_threshold", CTLFLAG_RW,
--- a/sys/net80211/ieee80211_freebsd.c
+++ b/sys/net80211/ieee80211_freebsd.c
@ -60,7 +60,8 @@ __FBSDID("$FreeBSD$");
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_input.h>

-SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD, 0, "IEEE 80211 parameters");
+SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+    "IEEE 80211 parameters");

 #ifdef IEEE80211_DEBUG
 static int	ieee80211_debug = 0;
@ -227,10 +228,10 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
 	sysctl_ctx_init(ctx);
 	snprintf(num, sizeof(num), "%u", ifp->if_dunit);
 	oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan),
-		OID_AUTO, num, CTLFLAG_RD, NULL, "");
+	    OID_AUTO, num, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"%parent", CTLTYPE_STRING | CTLFLAG_RD, vap->iv_ic, 0,
-		ieee80211_sysctl_parent, "A", "parent device");
+	    "%parent", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
+	    vap->iv_ic, 0, ieee80211_sysctl_parent, "A", "parent device");
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"driver_caps", CTLFLAG_RW, &vap->iv_caps, 0,
 		"driver capabilities");
@ -245,21 +246,21 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
 		"consecutive beacon misses before scanning");
 	/* XXX inherit from tunables */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"inact_run", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_run, 0,
-		ieee80211_sysctl_inact, "I",
-		"station inactivity timeout (sec)");
+	    "inact_run", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    &vap->iv_inact_run, 0, ieee80211_sysctl_inact, "I",
+	    "station inactivity timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"inact_probe", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_probe, 0,
-		ieee80211_sysctl_inact, "I",
-		"station inactivity probe timeout (sec)");
+	    "inact_probe", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    &vap->iv_inact_probe, 0, ieee80211_sysctl_inact, "I",
+	    "station inactivity probe timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"inact_auth", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_auth, 0,
-		ieee80211_sysctl_inact, "I",
-		"station authentication timeout (sec)");
+	    "inact_auth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    &vap->iv_inact_auth, 0, ieee80211_sysctl_inact, "I",
+	    "station authentication timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"inact_init", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_init, 0,
-		ieee80211_sysctl_inact, "I",
-		"station initial state timeout (sec)");
+	    "inact_init", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    &vap->iv_inact_init, 0, ieee80211_sysctl_inact, "I",
+	    "station initial state timeout (sec)");
 	if (vap->iv_htcaps & IEEE80211_HTC_HT) {
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"ampdu_mintraffic_bk", CTLFLAG_RW,
@ -280,14 +281,14 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
 	}

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-		"force_restart", CTLTYPE_INT | CTLFLAG_RW, vap, 0,
-		ieee80211_sysctl_vap_restart, "I",
-		"force a VAP restart");
+	    "force_restart", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+	    vap, 0, ieee80211_sysctl_vap_restart, "I", "force a VAP restart");

 	if (vap->iv_caps & IEEE80211_C_DFS) {
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
-			"radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0,
-			ieee80211_sysctl_radar, "I", "simulate radar event");
+		    "radar", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+		    vap->iv_ic, 0, ieee80211_sysctl_radar, "I",
+		    "simulate radar event");
 	}
 	vap->iv_sysctl = ctx;
 	vap->iv_oid = oid;
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@ -139,22 +139,25 @@ const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = {
 };

 static	int ieee80211_ampdu_age = -1;	/* threshold for ampdu reorder q (ms) */
-SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW,
-	&ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I",
-	"AMPDU max reorder age (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I",
+    "AMPDU max reorder age (ms)");

 static	int ieee80211_recv_bar_ena = 1;
 SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena,
 	    0, "BAR frame processing (ena/dis)");

 static	int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */
-SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW,
-	&ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I",
-	"ADDBA request timeout (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I",
+    "ADDBA request timeout (ms)");
 static	int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */
-SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW,
-	&ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I",
-	"ADDBA request backoff (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I",
+    "ADDBA request backoff (ms)");
 static	int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */
 SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW,
 	&ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff");
--- a/sys/net80211/ieee80211_hwmp.c
+++ b/sys/net80211/ieee80211_hwmp.c
@ -154,39 +154,46 @@ struct ieee80211_hwmp_state {
 	uint8_t			hs_maxhops;	/* max hop count */
 };

-static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
    "IEEE 802.11s HWMP parameters");
 static int	ieee80211_hwmp_targetonly = 0;
 SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLFLAG_RW,
    &ieee80211_hwmp_targetonly, 0, "Set TO bit on generated PREQs");
 static int	ieee80211_hwmp_pathtimeout = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_pathtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "path entry lifetime (ms)");
 static int	ieee80211_hwmp_maxpreq_retries = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_maxpreq_retries, 0, ieee80211_sysctl_msecs_ticks, "I",
    "maximum number of preq retries");
 static int	ieee80211_hwmp_net_diameter_traversaltime = -1;
 SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, net_diameter_traversal_time,
-    CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_net_diameter_traversaltime, 0,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &ieee80211_hwmp_net_diameter_traversaltime, 0,
    ieee80211_sysctl_msecs_ticks, "I",
    "estimate travelse time across the MBSS (ms)");
 static int	ieee80211_hwmp_roottimeout = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_roottimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "root PREQ timeout (ms)");
 static int	ieee80211_hwmp_rootint = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_rootint, 0, ieee80211_sysctl_msecs_ticks, "I",
    "root interval (ms)");
 static int	ieee80211_hwmp_rannint = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_rannint, 0, ieee80211_sysctl_msecs_ticks, "I",
    "root announcement interval (ms)");
 static struct timeval ieee80211_hwmp_rootconfint = { 0, 0 };
 static int	ieee80211_hwmp_rootconfint_internal = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint, CTLTYPE_INT | CTLFLAG_RD,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint,
+    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
    &ieee80211_hwmp_rootconfint_internal, 0, ieee80211_sysctl_msecs_ticks, "I",
    "root confirmation interval (ms) (read-only)");

@ -205,9 +212,10 @@ static struct ieee80211_mesh_proto_path mesh_proto_hwmp = {
 	.mpp_newstate	= hwmp_newstate,
 	.mpp_privlen	= sizeof(struct ieee80211_hwmp_route),
 };
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact, CTLTYPE_INT | CTLFLAG_RW,
-	&mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I",
-	"mesh route inactivity timeout (ms)");
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I",
+    "mesh route inactivity timeout (ms)");


 static void
--- a/sys/net80211/ieee80211_mesh.c
+++ b/sys/net80211/ieee80211_mesh.c
@ -106,27 +106,32 @@ uint32_t	mesh_airtime_calc(struct ieee80211_node *);
 /*
 * Timeout values come from the specification and are in milliseconds.
 */
-static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
    "IEEE 802.11s parameters");
 static int	ieee80211_mesh_gateint = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
    &ieee80211_mesh_gateint, 0, ieee80211_sysctl_msecs_ticks, "I",
    "mesh gate interval (ms)");
 static int ieee80211_mesh_retrytimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
    &ieee80211_mesh_retrytimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "Retry timeout (msec)");
 static int ieee80211_mesh_holdingtimeout = -1;

-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
    &ieee80211_mesh_holdingtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "Holding state timeout (msec)");
 static int ieee80211_mesh_confirmtimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
    &ieee80211_mesh_confirmtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "Confirm state timeout (msec)");
 static int ieee80211_mesh_backofftimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
    &ieee80211_mesh_backofftimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
    "Backoff timeout (msec). This is to throutles peering forever when "
    "not receiving answer or is rejected by a neighbor");
--- a/sys/net80211/ieee80211_rssadapt.c
+++ b/sys/net80211/ieee80211_rssadapt.c
@ -381,6 +381,7 @@ rssadapt_sysctlattach(struct ieee80211vap *vap,
 {

 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
-	    "rssadapt_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap,
-	    0, rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)");
+	    "rssadapt_rate_interval",
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vap, 0,
+	    rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)");
 }
--- a/sys/net80211/ieee80211_superg.c
+++ b/sys/net80211/ieee80211_superg.c
@ -92,9 +92,10 @@ static	int ieee80211_ffppsmin = 2;	/* pps threshold for ff aggregation */
 SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLFLAG_RW,
 	&ieee80211_ffppsmin, 0, "min packet rate before fast-frame staging");
 static	int ieee80211_ffagemax = -1;	/* max time frames held on stage q */
-SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW,
-	&ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I",
-	"max hold time for fast-frame staging (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+    &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I",
+    "max hold time for fast-frame staging (ms)");

 static void
 ff_age_all(void *arg, int npending)
--- a/sys/netgraph/ng_socket.c
+++ b/sys/netgraph/ng_socket.c
@ -219,7 +219,6 @@ static int
 ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	 struct mbuf *control, struct thread *td)
 {
-	struct epoch_tracker et;
 	struct ngpcb *const pcbp = sotongpcb(so);
 	struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node);
 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
@ -338,9 +337,7 @@ ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	item->apply = &apply;
 	priv->error = -1;

-	NET_EPOCH_ENTER(et);
 	error = ng_snd_item(item, 0);
-	NET_EPOCH_EXIT(et);

 	mtx_lock(&priv->mtx);
 	if (priv->error == -1)
@ -413,6 +410,7 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
 	int	len, error;
 	hook_p  hook = NULL;
+	item_p	item;
 	char	hookname[NG_HOOKSIZ];

 	if ((pcbp == NULL) || (control != NULL)) {
@ -465,8 +463,10 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	}

 	/* Send data. */
+	item = ng_package_data(m, NG_WAITOK);
+	m = NULL;
 	NET_EPOCH_ENTER(et);
-	NG_SEND_DATA_FLAGS(error, hook, m, NG_WAITOK);
+	NG_FWD_ITEM_HOOK(error, item, hook);
 	NET_EPOCH_EXIT(et);

 release:
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@ -218,19 +218,22 @@ static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);

-SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "CARP");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
-    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I",
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    0, 0, carp_allow_sysctl, "I",
    "Accept incoming CARP packets");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
-    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I",
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    0, 0, carp_dscp_sysctl, "I",
    "DSCP value for carp packets");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
    &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
    &VNET_NAME(carp_log), 0, "CARP log level");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
-    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
    0, 0, carp_demote_adj_sysctl, "I",
    "Adjust demotion factor (skew of advskew)");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@ -273,7 +273,8 @@ static void	pfsync_uninit(void);

 static unsigned long pfsync_buckets;

-SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "PFSYNC");
 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
    &VNET_NAME(pfsyncstats), pfsyncstats,
    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@ -369,7 +369,8 @@ VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
 VNET_DEFINE(struct pf_idhash *, pf_idhash);
 VNET_DEFINE(struct pf_srchash *, pf_srchash);

-SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
+SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "pf(4)");

 u_long	pf_hashmask;
 u_long	pf_srchashmask;
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@ -705,11 +705,10 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,

 	req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
 	while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) {
+		if (nosleep)
+			return (NULL);
 		PMAP_UNLOCK(pmap);
 		rw_wunlock(&pvh_global_lock);
-		if (nosleep) {
-			return (NULL);
-		}
 		vm_wait(NULL);
 		rw_wlock(&pvh_global_lock);
 		PMAP_LOCK(pmap);
@ -905,8 +904,6 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
 		pidx = (PTBL_PAGES * pdir_idx) + i;
 		while ((m = vm_page_alloc(NULL, pidx,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
-			PMAP_UNLOCK(pmap);
-			rw_wunlock(&pvh_global_lock);
 			if (nosleep) {
 				ptbl_free_pmap_ptbl(pmap, ptbl);
 				for (j = 0; j < i; j++)
@ -914,6 +911,8 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
 				vm_wire_sub(i);
 				return (NULL);
 			}
+			PMAP_UNLOCK(pmap);
+			rw_wunlock(&pvh_global_lock);
 			vm_wait(NULL);
 			rw_wlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
@ -2481,8 +2480,8 @@ mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start,
 		    PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
 		m = TAILQ_NEXT(m, listq);
 	}
-	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
+	rw_wunlock(&pvh_global_lock);
 }

 static void
@ -2495,8 +2494,8 @@ mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 	mmu_booke_enter_locked(mmu, pmap, va, m,
 	    prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_QUICK_LOCKED, 0);
-	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
+	rw_wunlock(&pvh_global_lock);
 }

 /*
--- a/sys/security/audit/audit.h
+++ b/sys/security/audit/audit.h
@ -120,6 +120,10 @@ void	 audit_arg_upath1(struct thread *td, int dirfd, char *upath);
 void	 audit_arg_upath1_canon(char *upath);
 void	 audit_arg_upath2(struct thread *td, int dirfd, char *upath);
 void	 audit_arg_upath2_canon(char *upath);
+void	 audit_arg_upath1_vp(struct thread *td, struct vnode *rdir,
+	    struct vnode *cdir, char *upath);
+void	 audit_arg_upath2_vp(struct thread *td, struct vnode *rdir,
+	    struct vnode *cdir, char *upath);
 void	 audit_arg_vnode1(struct vnode *vp);
 void	 audit_arg_vnode2(struct vnode *vp);
 void	 audit_arg_text(const char *text);
@ -362,6 +366,16 @@ void	 audit_thread_free(struct thread *td);
 		audit_arg_upath2_canon((upath));			\
 } while (0)

+#define	AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath) do {			\
+	if (AUDITING_TD(curthread))					\
+		audit_arg_upath1_vp((td), (rdir), (cdir), (upath));	\
+} while (0)
+
+#define	AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath) do {			\
+	if (AUDITING_TD(curthread))					\
+		audit_arg_upath2_vp((td), (rdir), (cdir), (upath));	\
+} while (0)
+
 #define	AUDIT_ARG_VALUE(value) do {					\
 	if (AUDITING_TD(curthread))					\
 		audit_arg_value((value));				\
@ -448,6 +462,8 @@ void	 audit_thread_free(struct thread *td);
 #define	AUDIT_ARG_UPATH1_CANON(upath)
 #define	AUDIT_ARG_UPATH2(td, dirfd, upath)
 #define	AUDIT_ARG_UPATH2_CANON(upath)
+#define	AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath)
+#define	AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath)
 #define	AUDIT_ARG_VALUE(value)
 #define	AUDIT_ARG_VNODE1(vp)
 #define	AUDIT_ARG_VNODE2(vp)
--- a/sys/security/audit/audit_arg.c
+++ b/sys/security/audit/audit_arg.c
@ -767,6 +767,44 @@ audit_arg_upath2(struct thread *td, int dirfd, char *upath)
 	ARG_SET_VALID(ar, ARG_UPATH2);
 }

+static void
+audit_arg_upath_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+    char *upath, char **pathp)
+{
+
+	if (*pathp == NULL)
+		*pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK);
+	audit_canon_path_vp(td, rdir, cdir, upath, *pathp);
+}
+
+void
+audit_arg_upath1_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+    char *upath)
+{
+	struct kaudit_record *ar;
+
+	ar = currecord();
+	if (ar == NULL)
+		return;
+
+	audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath1);
+	ARG_SET_VALID(ar, ARG_UPATH1);
+}
+
+void
+audit_arg_upath2_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+    char *upath)
+{
+	struct kaudit_record *ar;
+
+	ar = currecord();
+	if (ar == NULL)
+		return;
+
+	audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath2);
+	ARG_SET_VALID(ar, ARG_UPATH2);
+}
+
 /*
 * Variants on path auditing that do not canonicalise the path passed in;
 * these are for use with filesystem-like subsystems that employ string names,
--- a/sys/security/audit/audit_bsm_klib.c
+++ b/sys/security/audit/audit_bsm_klib.c
@ -421,57 +421,23 @@ auditon_command_event(int cmd)
 * leave the filename starting with '/' in the audit log in this case.
 */
 void
-audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
+audit_canon_path_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+    char *path, char *cpath)
 {
-	struct vnode *cvnp, *rvnp;
+	struct vnode *vp;
 	char *rbuf, *fbuf, *copy;
-	struct filedesc *fdp;
 	struct sbuf sbf;
-	cap_rights_t rights;
-	int error, needslash;
+	int error;

 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
 	    __func__,  __FILE__, __LINE__);

 	copy = path;
-	rvnp = cvnp = NULL;
-	fdp = td->td_proc->p_fd;
-	FILEDESC_SLOCK(fdp);
-	/*
-	 * Make sure that we handle the chroot(2) case.  If there is an
-	 * alternate root directory, prepend it to the audited pathname.
-	 */
-	if (fdp->fd_rdir != NULL && fdp->fd_rdir != rootvnode) {
-		rvnp = fdp->fd_rdir;
-		vhold(rvnp);
-	}
-	/*
-	 * If the supplied path is relative, make sure we capture the current
-	 * working directory so we can prepend it to the supplied relative
-	 * path.
-	 */
-	if (*path != '/') {
-		if (dirfd == AT_FDCWD) {
-			cvnp = fdp->fd_cdir;
-			vhold(cvnp);
-		} else {
-			/* XXX: fgetvp() that vhold()s vnode instead of vref()ing it would be better */
-			error = fgetvp(td, dirfd, cap_rights_init(&rights), &cvnp);
-			if (error) {
-				FILEDESC_SUNLOCK(fdp);
-				cpath[0] = '\0';
-				if (rvnp != NULL)
-					vdrop(rvnp);
-				return;
-			}
-			vhold(cvnp);
-			vrele(cvnp);
-		}
-		needslash = (fdp->fd_rdir != cvnp);
-	} else {
-		needslash = 1;
-	}
-	FILEDESC_SUNLOCK(fdp);
+	if (*path == '/')
+		vp = rdir;
+	else
+		vp = cdir;
+	MPASS(vp != NULL);
 	/*
 	 * NB: We require that the supplied array be at least MAXPATHLEN bytes
 	 * long.  If this is not the case, then we can run into serious trouble.
@ -479,6 +445,8 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
 	(void) sbuf_new(&sbf, cpath, MAXPATHLEN, SBUF_FIXEDLEN);
 	/*
 	 * Strip leading forward slashes.
+	 *
+	 * Note this does nothing to fully canonicalize the path.
 	 */
 	while (*copy == '/')
 		copy++;
@ -490,35 +458,25 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
 	 * on Darwin.  As a result, this may need some additional attention
 	 * in the future.
 	 */
-	if (rvnp != NULL) {
-		error = vn_fullpath_global(td, rvnp, &rbuf, &fbuf);
-		vdrop(rvnp);
-		if (error) {
-			cpath[0] = '\0';
-			if (cvnp != NULL)
-				vdrop(cvnp);
-			return;
-		}
-		(void) sbuf_cat(&sbf, rbuf);
-		free(fbuf, M_TEMP);
+	error = vn_fullpath_global(td, vp, &rbuf, &fbuf);
+	if (error) {
+		cpath[0] = '\0';
+		return;
 	}
-	if (cvnp != NULL) {
-		error = vn_fullpath(td, cvnp, &rbuf, &fbuf);
-		vdrop(cvnp);
-		if (error) {
-			cpath[0] = '\0';
-			return;
-		}
-		(void) sbuf_cat(&sbf, rbuf);
-		free(fbuf, M_TEMP);
-	}
-	if (needslash)
+	(void) sbuf_cat(&sbf, rbuf);
+	/*
+	 * We are going to concatenate the resolved path with the passed path
+	 * with all slashes removed and we want them glued with a single slash.
+	 * However, if the directory is /, the slash is already there.
+	 */
+	if (rbuf[1] != '\0')
 		(void) sbuf_putc(&sbf, '/');
+	free(fbuf, M_TEMP);
 	/*
 	 * Now that we have processed any alternate root and relative path
 	 * names, add the supplied pathname.
 	 */
-        (void) sbuf_cat(&sbf, copy);
+	(void) sbuf_cat(&sbf, copy);
 	/*
 	 * One or more of the previous sbuf operations could have resulted in
 	 * the supplied buffer being overflowed.  Check to see if this is the
@ -530,3 +488,43 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
 	}
 	sbuf_finish(&sbf);
 }
+
+void
+audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
+{
+	struct vnode *cdir, *rdir;
+	struct filedesc *fdp;
+	cap_rights_t rights;
+	int error;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
+	    __func__,  __FILE__, __LINE__);
+
+	rdir = cdir = NULL;
+	fdp = td->td_proc->p_fd;
+	FILEDESC_SLOCK(fdp);
+	if (*path == '/') {
+		rdir = fdp->fd_rdir;
+		vrefact(rdir);
+	} else {
+		if (dirfd == AT_FDCWD) {
+			cdir = fdp->fd_cdir;
+			vrefact(cdir);
+		} else {
+			error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir);
+			if (error != 0) {
+				FILEDESC_SUNLOCK(fdp);
+				cpath[0] = '\0';
+				return;
+			}
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+
+	audit_canon_path_vp(td, rdir, cdir, path, cpath);
+
+	if (rdir != NULL)
+		vrele(rdir);
+	if (cdir != NULL)
+		vrele(cdir);
+}
--- a/sys/security/audit/audit_private.h
+++ b/sys/security/audit/audit_private.h
@ -472,6 +472,8 @@ au_event_t	 audit_semsys_to_event(int which);
 au_event_t	 audit_shmsys_to_event(int which);
 void		 audit_canon_path(struct thread *td, int dirfd, char *path,
 		    char *cpath);
+void		 audit_canon_path_vp(struct thread *td, struct vnode *rdir,
+		    struct vnode *cdir, char *path, char *cpath);
 au_event_t	 auditon_command_event(int cmd);

 /*
--- a/sys/sys/_smr.h
+++ b/sys/sys/_smr.h
@ -32,6 +32,7 @@
 #define	_SYS__SMR_H_

 typedef uint32_t	smr_seq_t;
+typedef int32_t		smr_delta_t;
 typedef struct smr 	*smr_t;

 #endif	/* __SYS_SMR_H_ */
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@ -60,7 +60,7 @@
 *		in the range 5 to 9.
 */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1300080	/* Master, propagated to newvers */
+#define __FreeBSD_version 1300081	/* Master, propagated to newvers */

 /*
 * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
--- a/sys/sys/smr.h
+++ b/sys/sys/smr.h
@ -45,11 +45,13 @@
 * Modular arithmetic for comparing sequence numbers that have
 * potentially wrapped.  Copied from tcp_seq.h.
 */
-#define	SMR_SEQ_LT(a, b)	((int32_t)((a)-(b)) < 0)
-#define	SMR_SEQ_LEQ(a, b)	((int32_t)((a)-(b)) <= 0)
-#define	SMR_SEQ_GT(a, b)	((int32_t)((a)-(b)) > 0)
-#define	SMR_SEQ_GEQ(a, b)	((int32_t)((a)-(b)) >= 0)
-#define	SMR_SEQ_DELTA(a, b)	((int32_t)((a)-(b)))
+#define	SMR_SEQ_LT(a, b)	((smr_delta_t)((a)-(b)) < 0)
+#define	SMR_SEQ_LEQ(a, b)	((smr_delta_t)((a)-(b)) <= 0)
+#define	SMR_SEQ_GT(a, b)	((smr_delta_t)((a)-(b)) > 0)
+#define	SMR_SEQ_GEQ(a, b)	((smr_delta_t)((a)-(b)) >= 0)
+#define	SMR_SEQ_DELTA(a, b)	((smr_delta_t)((a)-(b)))
+#define	SMR_SEQ_MIN(a, b)	(SMR_SEQ_LT((a), (b)) ? (a) : (b))
+#define	SMR_SEQ_MAX(a, b)	(SMR_SEQ_GT((a), (b)) ? (a) : (b))

 #define	SMR_SEQ_INVALID		0

@ -66,8 +68,13 @@ struct smr {
 	smr_seq_t	c_seq;		/* Current observed sequence. */
 	smr_shared_t	c_shared;	/* Shared SMR state. */
 	int		c_deferred;	/* Deferred advance counter. */
+	int		c_limit;	/* Deferred advance limit. */
+	int		c_flags;	/* SMR Configuration */
 };

+#define	SMR_LAZY	0x0001		/* Higher latency write, fast read. */
+#define	SMR_DEFERRED	0x0002		/* Aggregate updates to wr_seq. */
+
 #define	SMR_ENTERED(smr)						\
    (curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID)

@ -94,7 +101,7 @@ struct smr {
 * All acceses include a parameter for an assert to verify the required
 * synchronization.  For example, a writer might use:
 *
- * smr_serilized_store(pointer, value, mtx_owned(&writelock));
+ * smr_serialized_store(pointer, value, mtx_owned(&writelock));
 *
 * These are only enabled in INVARIANTS kernels.
 */
@ -127,6 +134,9 @@ typedef struct {							\
 * Store 'v' to an SMR protected pointer while serialized by an
 * external mechanism.  'ex' should contain an assert that the
 * external mechanism is held.  i.e. mtx_owned()
+ *
+ * Writers that are serialized with mutual exclusion or on a single
+ * thread should use smr_serialized_store() rather than swap.
 */
 #define	smr_serialized_store(p, v, ex) do {				\
 	SMR_ASSERT(ex, "smr_serialized_store");				\
@ -138,6 +148,8 @@ typedef struct {							\
 * swap 'v' with an SMR protected pointer and return the old value
 * while serialized by an external mechanism.  'ex' should contain
 * an assert that the external mechanism is provided.  i.e. mtx_owned()
+ *
+ * Swap permits multiple writers to update a pointer concurrently.
 */
 #define	smr_serialized_swap(p, v, ex) ({				\
 	SMR_ASSERT(ex, "smr_serialized_swap");				\
@ -170,7 +182,8 @@ typedef struct {							\
 } while (0)

 /*
- * Return the current write sequence number.
+ * Return the current write sequence number.  This is not the same as the
+ * current goal which may be in the future.
 */
 static inline smr_seq_t
 smr_shared_current(smr_shared_t s)
@ -195,6 +208,8 @@ smr_enter(smr_t smr)

 	critical_enter();
 	smr = zpcpu_get(smr);
+	KASSERT((smr->c_flags & SMR_LAZY) == 0,
+	    ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
 	KASSERT(smr->c_seq == 0,
 	    ("smr_enter(%s) does not support recursion.",
 	    smr->c_shared->s_name));
@ -228,6 +243,8 @@ smr_exit(smr_t smr)

 	smr = zpcpu_get(smr);
 	CRITICAL_ASSERT(curthread);
+	KASSERT((smr->c_flags & SMR_LAZY) == 0,
+	    ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
 	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
 	    ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));

@ -242,19 +259,63 @@ smr_exit(smr_t smr)
 	critical_exit();
 }

+/*
+ * Enter a lazy smr section.  This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_enter(smr_t smr)
+{
+
+	critical_enter();
+	smr = zpcpu_get(smr);
+	KASSERT((smr->c_flags & SMR_LAZY) != 0,
+	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+	KASSERT(smr->c_seq == 0,
+	    ("smr_lazy_enter(%s) does not support recursion.",
+	    smr->c_shared->s_name));
+
+	/*
+	 * This needs no serialization.  If an interrupt occurs before we
+	 * assign sr_seq to c_seq any speculative loads will be discarded.
+	 * If we assign a stale wr_seq value due to interrupt we use the
+	 * same algorithm that renders smr_enter() safe.
+	 */
+	smr->c_seq = smr_shared_current(smr->c_shared);
+}
+
+/*
+ * Exit a lazy smr section.  This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_exit(smr_t smr)
+{
+
+	smr = zpcpu_get(smr);
+	CRITICAL_ASSERT(curthread);
+	KASSERT((smr->c_flags & SMR_LAZY) != 0,
+	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
+	    ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
+
+	/*
+	 * All loads/stores must be retired before the sequence becomes
+	 * visible.  The fence compiles away on amd64.  Another
+	 * alternative would be to omit the fence but store the exit
+	 * time and wait 1 tick longer.
+	 */
+	atomic_thread_fence_rel();
+	smr->c_seq = SMR_SEQ_INVALID;
+	critical_exit();
+}
+
 /*
 * Advances the write sequence number.  Returns the sequence number
 * required to ensure that all modifications are visible to readers.
 */
 smr_seq_t smr_advance(smr_t smr);

-/*
- * Advances the write sequence number only after N calls.  Returns
- * the correct goal for a wr_seq that has not yet occurred.  Used to
- * minimize shared cacheline invalidations for frequent writers.
- */
-smr_seq_t smr_advance_deferred(smr_t smr, int limit);
-
 /*
 * Returns true if a goal sequence has been reached.  If
 * wait is true this will busy loop until success.
@ -262,7 +323,9 @@ smr_seq_t smr_advance_deferred(smr_t smr, int limit);
 bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);

 /* Create a new SMR context. */
-smr_t smr_create(const char *name);
+smr_t smr_create(const char *name, int limit, int flags);
+
+/* Destroy the context. */
 void smr_destroy(smr_t smr);

 /*
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@ -1140,7 +1140,6 @@ hash_free(struct uma_hash *hash)
 * Returns:
 *	Nothing
 */
-
 static void
 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
@ -1200,7 +1199,7 @@ cache_drain(uma_zone_t zone)
 	 */
 	seq = SMR_SEQ_INVALID;
 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
-		seq = smr_current(zone->uz_smr);
+		seq = smr_advance(zone->uz_smr);
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
 		bucket = cache_bucket_unload_alloc(cache);
@ -1329,7 +1328,7 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
 		 * the item count.  Reclaim it individually here.
 		 */
 		zdom = ZDOM_GET(zone, i);
-		if ((zone->uz_flags & UMA_ZONE_SMR) == 0) {
+		if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) {
 			ZONE_CROSS_LOCK(zone);
 			bucket = zdom->uzd_cross;
 			zdom->uzd_cross = NULL;
@ -2679,7 +2678,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)

 	/* Caller requests a private SMR context. */
 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
-		zone->uz_smr = smr_create(zone->uz_name);
+		zone->uz_smr = smr_create(zone->uz_name, 0, 0);

 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
@ -4137,22 +4136,21 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 	    "uma_zfree: zone %s(%p) draining cross bucket %p",
 	    zone->uz_name, zone, bucket);

-	STAILQ_INIT(&fullbuckets);
-
-	/*
-	 * To avoid having ndomain * ndomain buckets for sorting we have a
-	 * lock on the current crossfree bucket.  A full matrix with
-	 * per-domain locking could be used if necessary.
-	 */
-	ZONE_CROSS_LOCK(zone);
-
 	/*
 	 * It is possible for buckets to arrive here out of order so we fetch
 	 * the current smr seq rather than accepting the bucket's.
 	 */
 	seq = SMR_SEQ_INVALID;
 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
-		seq = smr_current(zone->uz_smr);
+		seq = smr_advance(zone->uz_smr);
+
+	/*
+	 * To avoid having ndomain * ndomain buckets for sorting we have a
+	 * lock on the current crossfree bucket.  A full matrix with
+	 * per-domain locking could be used if necessary.
+	 */
+	STAILQ_INIT(&fullbuckets);
+	ZONE_CROSS_LOCK(zone);
 	while (bucket->ub_cnt > 0) {
 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
 		domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@ -284,7 +284,7 @@ printcpuinfo(void)
 			switch (cpu_id & 0xf00) {
 			case 0x400:
 				strcat(cpu_model, "i486 ");
-			        /* Check the particular flavor of 486 */
+				/* Check the particular flavor of 486 */
 				switch (cpu_id & 0xf0) {
 				case 0x00:
 				case 0x10:
@ -312,32 +312,32 @@ printcpuinfo(void)
 				}
 				break;
 			case 0x500:
-			        /* Check the particular flavor of 586 */
-			        strcat(cpu_model, "Pentium");
-			        switch (cpu_id & 0xf0) {
+				/* Check the particular flavor of 586 */
+				strcat(cpu_model, "Pentium");
+				switch (cpu_id & 0xf0) {
 				case 0x00:
-				        strcat(cpu_model, " A-step");
+					strcat(cpu_model, " A-step");
 					break;
 				case 0x10:
-				        strcat(cpu_model, "/P5");
+					strcat(cpu_model, "/P5");
 					break;
 				case 0x20:
-				        strcat(cpu_model, "/P54C");
+					strcat(cpu_model, "/P54C");
 					break;
 				case 0x30:
-				        strcat(cpu_model, "/P24T");
+					strcat(cpu_model, "/P24T");
 					break;
 				case 0x40:
-				        strcat(cpu_model, "/P55C");
+					strcat(cpu_model, "/P55C");
 					break;
 				case 0x70:
-				        strcat(cpu_model, "/P54C");
+					strcat(cpu_model, "/P54C");
 					break;
 				case 0x80:
-				        strcat(cpu_model, "/P55C (quarter-micron)");
+					strcat(cpu_model, "/P55C (quarter-micron)");
 					break;
 				default:
-				        /* nothing */
+					/* nothing */
 					break;
 				}
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
@ -350,18 +350,18 @@ printcpuinfo(void)
 #endif
 				break;
 			case 0x600:
-			        /* Check the particular flavor of 686 */
-  			        switch (cpu_id & 0xf0) {
+				/* Check the particular flavor of 686 */
+				switch (cpu_id & 0xf0) {
 				case 0x00:
-				        strcat(cpu_model, "Pentium Pro A-step");
+					strcat(cpu_model, "Pentium Pro A-step");
 					break;
 				case 0x10:
-				        strcat(cpu_model, "Pentium Pro");
+					strcat(cpu_model, "Pentium Pro");
 					break;
 				case 0x30:
 				case 0x50:
 				case 0x60:
-				        strcat(cpu_model,
+					strcat(cpu_model,
 				"Pentium II/Pentium II Xeon/Celeron");
 					cpu = CPU_PII;
 					break;
@ -369,12 +369,12 @@ printcpuinfo(void)
 				case 0x80:
 				case 0xa0:
 				case 0xb0:
-				        strcat(cpu_model,
+					strcat(cpu_model,
 					"Pentium III/Pentium III Xeon/Celeron");
 					cpu = CPU_PIII;
 					break;
 				default:
-				        strcat(cpu_model, "Unknown 80686");
+					strcat(cpu_model, "Unknown 80686");
 					break;
 				}
 				break;
@ -1411,7 +1411,7 @@ identify_hypervisor_cpuid_base(void)
 		if (regs[0] == 0 && regs[1] == 0x4b4d564b &&
 		    regs[2] == 0x564b4d56 && regs[3] == 0x0000004d)
 			regs[0] = leaf + 1;
-			
+
 		if (regs[0] >= leaf) {
 			for (i = 0; i < nitems(vm_cpuids); i++)
 				if (strncmp((const char *)&regs[1],
@ -1471,7 +1471,7 @@ identify_hypervisor(void)
 		if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
 			vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
 			if (regs[1] == VMW_HVMAGIC) {
-				vm_guest = VM_GUEST_VMWARE;			
+				vm_guest = VM_GUEST_VMWARE;
 				freeenv(p);
 				return;
 			}
@ -2341,23 +2341,23 @@ print_svm_info(void)
 		comma = 0;
 		if (features & (1 << 0)) {
 			printf("%sNP", comma ? "," : "");
-                        comma = 1; 
+			comma = 1;
 		}
 		if (features & (1 << 3)) {
 			printf("%sNRIP", comma ? "," : "");
-                        comma = 1; 
+			comma = 1;
 		}
 		if (features & (1 << 5)) {
 			printf("%sVClean", comma ? "," : "");
-                        comma = 1; 
+			comma = 1;
 		}
 		if (features & (1 << 6)) {
 			printf("%sAFlush", comma ? "," : "");
-                        comma = 1; 
+			comma = 1;
 		}
 		if (features & (1 << 7)) {
 			printf("%sDAssist", comma ? "," : "");
-                        comma = 1; 
+			comma = 1;
 		}
 		printf("%sNAsids=%d", comma ? "," : "", regs[1]);
 		return;
@ -2375,7 +2375,7 @@ print_svm_info(void)
 	       "\010DecodeAssist"	/* Decode assist */
 	       "\011<b8>"
 	       "\012<b9>"
-	       "\013PauseFilter"	/* PAUSE intercept filter */    
+	       "\013PauseFilter"	/* PAUSE intercept filter */
 	       "\014EncryptedMcodePatch"
 	       "\015PauseFilterThreshold" /* PAUSE filter threshold */
 	       "\016AVIC"		/* virtual interrupt controller */
@ -2385,7 +2385,7 @@ print_svm_info(void)
 	       "\022GMET"		/* Guest Mode Execute Trap */
 	       "\023<b18>"
 	       "\024<b19>"
-	       "\025<b20>"
+	       "\025GuesSpecCtl"	/* Guest Spec_ctl */
 	       "\026<b21>"
 	       "\027<b22>"
 	       "\030<b23>"
@ -2397,7 +2397,7 @@ print_svm_info(void)
 	       "\036<b29>"
 	       "\037<b30>"
 	       "\040<b31>"
-                );
+	       );
 	printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]);
 }

--- a/tools/bsdbox/Makefile.base
+++ b/tools/bsdbox/Makefile.base
@ -20,6 +20,7 @@ CRUNCH_LIBS+=		-ldevstat -lncursesw -lncurses -lmemstat -lkvm -lelf
 CRUNCH_PROGS_usr.bin+=	cpio
 # XXX SSL ?
 CRUNCH_LIBS+=		-larchive -lbz2 -lz -llzma -lbsdxml -lssl -lcrypto
+CRUNCH_LIBS+=		-lprivatezstd -lthr

 # Clear requires tput, and it's a shell script so it won't be crunched
 CRUNCH_PROGS_usr.bin+=	tput
--- a/usr.bin/dtc/dtc.cc
+++ b/usr.bin/dtc/dtc.cc
@ -304,7 +304,10 @@ main(int argc, char **argv)
 			}
 			break;
 		default:
-			fprintf(stderr, "Unknown option %c\n", ch);
+			/* 
+			 * Since opterr is non-zero, getopt will have
+			 * already printed an error message.
+			 */
 			return EXIT_FAILURE;
 		}
 	}
--- a/usr.sbin/bhyve/iov.c
+++ b/usr.sbin/bhyve/iov.c
@ -119,24 +119,25 @@ iov_to_buf(const struct iovec *iov, int niov, void **buf)
 }

 ssize_t
-buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov,
+buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov,
    size_t seek)
 {
 	struct iovec *diov;
-	int ndiov, i;
 	size_t off = 0, len;
+	int  i;

 	if (seek > 0) {
+		int ndiov;
+
 		diov = malloc(sizeof(struct iovec) * niov);
 		seek_iov(iov, niov, diov, &ndiov, seek);
-	} else {
-		diov = iov;
-		ndiov = niov;
+		iov = diov;
+		niov = ndiov;
 	}

-	for (i = 0; i < ndiov && off < buflen; i++) {
-		len = MIN(diov[i].iov_len, buflen - off);
-		memcpy(diov[i].iov_base, buf + off, len);
+	for (i = 0; i < niov && off < buflen; i++) {
+		len = MIN(iov[i].iov_len, buflen - off);
+		memcpy(iov[i].iov_base, buf + off, len);
 		off += len;
 	}

--- a/usr.sbin/bhyve/iov.h
+++ b/usr.sbin/bhyve/iov.h
@ -38,7 +38,7 @@ void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2,
 void truncate_iov(struct iovec *iov, int *niov, size_t length);
 size_t count_iov(const struct iovec *iov, int niov);
 ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf);
-ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov,
-    size_t seek);
+ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov,
+    int niov, size_t seek);

 #endif	/* _IOV_H_ */
--- a/usr.sbin/bhyve/net_backends.c
+++ b/usr.sbin/bhyve/net_backends.c
@ -102,6 +102,13 @@ struct net_backend {
 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
 	    int iovcnt);

+	/*
+	 * Get the length of the next packet that can be received from
+	 * the backend. If no packets are currently available, this
+	 * function returns 0.
+	 */
+	ssize_t (*peek_recvlen)(struct net_backend *be);
+
 	/*
 	 * Called to receive a packet from the backend. When the function
 	 * returns a positive value 'len', the scatter-gather vector
@ -167,6 +174,13 @@ SET_DECLARE(net_backend_set, struct net_backend);

 struct tap_priv {
 	struct mevent *mevp;
+	/*
+	 * A bounce buffer that allows us to implement the peek_recvlen
+	 * callback. In the future we may get the same information from
+	 * the kevent data.
+	 */
+	char bbuf[1 << 16];
+	ssize_t bbuflen;
 };

 static void
@ -223,6 +237,9 @@ tap_init(struct net_backend *be, const char *devname,
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 #endif

+	memset(priv->bbuf, 0, sizeof(priv->bbuf));
+	priv->bbuflen = 0;
+
 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
 		WPRINTF(("Could not register event"));
@ -246,15 +263,56 @@ tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
 }

 static ssize_t
-tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+tap_peek_recvlen(struct net_backend *be)
 {
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
 	ssize_t ret;

-	/* Should never be called without a valid tap fd */
-	assert(be->fd != -1);
+	if (priv->bbuflen > 0) {
+		/*
+		 * We already have a packet in the bounce buffer.
+		 * Just return its length.
+		 */
+		return priv->bbuflen;
+	}
+
+	/*
+	 * Read the next packet (if any) into the bounce buffer, so
+	 * that we get to know its length and we can return that
+	 * to the caller.
+	 */
+	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
+	if (ret < 0 && errno == EWOULDBLOCK) {
+		return (0);
+	}
+
+	if (ret > 0)
+		priv->bbuflen = ret;
+
+	return (ret);
+}
+
+static ssize_t
+tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+	ssize_t ret;
+
+	if (priv->bbuflen > 0) {
+		/*
+		 * A packet is available in the bounce buffer, so
+		 * we read it from there.
+		 */
+		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
+		    iov, iovcnt, 0);
+
+		/* Mark the bounce buffer as empty. */
+		priv->bbuflen = 0;
+
+		return (ret);
+	}

 	ret = readv(be->fd, iov, iovcnt);
-
 	if (ret < 0 && errno == EWOULDBLOCK) {
 		return (0);
 	}
@ -299,6 +357,7 @@ static struct net_backend tap_backend = {
 	.init = tap_init,
 	.cleanup = tap_cleanup,
 	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
 	.recv = tap_recv,
 	.recv_enable = tap_recv_enable,
 	.recv_disable = tap_recv_disable,
@ -313,6 +372,7 @@ static struct net_backend vmnet_backend = {
 	.init = tap_init,
 	.cleanup = tap_cleanup,
 	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
 	.recv = tap_recv,
 	.recv_enable = tap_recv_enable,
 	.recv_disable = tap_recv_disable,
@ -331,8 +391,7 @@ DATA_SET(net_backend_set, vmnet_backend);
 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
-		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \
-		VIRTIO_NET_F_MRG_RXBUF)
+		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)

 struct netmap_priv {
 	char ifname[IFNAMSIZ];
@ -539,6 +598,26 @@ netmap_send(struct net_backend *be, const struct iovec *iov,
 	return (totlen);
 }

+static ssize_t
+netmap_peek_recvlen(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+	struct netmap_ring *ring = priv->rx;
+	uint32_t head = ring->head;
+	ssize_t totlen = 0;
+
+	while (head != ring->tail) {
+		struct netmap_slot *slot = ring->slot + head;
+
+		totlen += slot->len;
+		if ((slot->flags & NS_MOREFRAG) == 0)
+			break;
+		head = nm_ring_next(ring, head);
+	}
+
+	return (totlen);
+}
+
 static ssize_t
 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
@ -628,6 +707,7 @@ static struct net_backend netmap_backend = {
 	.init = netmap_init,
 	.cleanup = netmap_cleanup,
 	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
 	.recv = netmap_recv,
 	.recv_enable = netmap_recv_enable,
 	.recv_disable = netmap_recv_disable,
@ -642,6 +722,7 @@ static struct net_backend vale_backend = {
 	.init = netmap_init,
 	.cleanup = netmap_cleanup,
 	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
 	.recv = netmap_recv,
 	.recv_enable = netmap_recv_enable,
 	.recv_disable = netmap_recv_disable,
@ -758,6 +839,13 @@ netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
 	return (be->send(be, iov, iovcnt));
 }

+ssize_t
+netbe_peek_recvlen(struct net_backend *be)
+{
+
+	return (be->peek_recvlen(be));
+}
+
 /*
 * Try to read a packet from the backend, without blocking.
 * If no packets are available, return 0. In case of success, return
--- a/usr.sbin/bhyve/net_backends.h
+++ b/usr.sbin/bhyve/net_backends.h
@ -45,6 +45,7 @@ int	 netbe_set_cap(net_backend_t *be, uint64_t cap,
             unsigned vnet_hdr_len);
 size_t	netbe_get_vnet_hdr_len(net_backend_t *be);
 ssize_t	netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t	netbe_peek_recvlen(net_backend_t *be);
 ssize_t	netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
 ssize_t	netbe_rx_discard(net_backend_t *be);
 void	netbe_rx_disable(net_backend_t *be);
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@ -228,22 +228,34 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 	struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	struct vqueue_info *vq;
-	uint32_t riov_bytes;
-	struct iovec *riov;
-	int riov_len;
-	uint32_t ulen;
-	int n_chains;
-	int len;

 	vq = &sc->vsc_queues[VTNET_RXQ];
 	for (;;) {
 		struct virtio_net_rxhdr *hdr;
+		uint32_t riov_bytes;
+		struct iovec *riov;
+		uint32_t ulen;
+		int riov_len;
+		int n_chains;
+		ssize_t rlen;
+		ssize_t plen;
+
+		plen = netbe_peek_recvlen(sc->vsc_be);
+		if (plen <= 0) {
+			/*
+			 * No more packets (plen == 0), or backend errored
+			 * (plen < 0). Interrupt if needed and stop.
+			 */
+			vq_endchains(vq, /*used_all_avail=*/0);
+			return;
+		}
+		plen += prepend_hdr_len;

 		/*
 		 * Get a descriptor chain to store the next ingress
 		 * packet. In case of mergeable rx buffers, get as
 		 * many chains as necessary in order to make room
-		 * for a maximum sized LRO packet.
+		 * for plen bytes.
 		 */
 		riov_bytes = 0;
 		riov_len = 0;
@ -287,8 +299,7 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 			riov_bytes += info[n_chains].len;
 			riov += n;
 			n_chains++;
-		} while (riov_bytes < VTNET_MAX_PKT_LEN &&
-			    riov_len < VTNET_MAXSEGS);
+		} while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);

 		riov = iov;
 		hdr = riov[0].iov_base;
@ -312,21 +323,20 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 			memset(hdr, 0, prepend_hdr_len);
 		}

-		len = netbe_recv(sc->vsc_be, riov, riov_len);
-
-		if (len <= 0) {
+		rlen = netbe_recv(sc->vsc_be, riov, riov_len);
+		if (rlen != plen - prepend_hdr_len) {
 			/*
-			 * No more packets (len == 0), or backend errored
-			 * (err < 0). Return unused available buffers
-			 * and stop.
+			 * If this happens it means there is something
+			 * wrong with the backend (e.g., some other
+			 * process is stealing our packets).
 			 */
+			WPRINTF(("netbe_recv: expected %zd bytes, "
+				"got %zd", plen - prepend_hdr_len, rlen));
 			vq_retchains(vq, n_chains);
-			/* Interrupt if needed/appropriate and stop. */
-			vq_endchains(vq, /*used_all_avail=*/0);
-			return;
+			continue;
 		}

-		ulen = (uint32_t)(len + prepend_hdr_len);
+		ulen = (uint32_t)plen;

 		/*
 		 * Publish the used buffers to the guest, reporting the
@ -346,12 +356,11 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 				vq_relchain_prepare(vq, info[i].idx, iolen);
 				ulen -= iolen;
 				i++;
-				assert(i <= n_chains);
 			} while (ulen > 0);

 			hdr->vrh_bufs = i;
 			vq_relchain_publish(vq);
-			vq_retchains(vq, n_chains - i);
+			assert(i == n_chains);
 		}
 	}

@ -592,7 +601,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
 			free(sc);
 			return (err);
 		}
-		sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be);
+		sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF |
+		    netbe_get_cap(sc->vsc_be);
 	}

 	if (!mac_provided) {
--- a/usr.sbin/iostat/iostat.c
+++ b/usr.sbin/iostat/iostat.c
@ -929,7 +929,7 @@ devstats(int perf_select, long double etime, int havelast)
 			}
 			free(devicename);
 		} else if (oflag > 0) {
-			int msdig = (ms_per_transaction < 100.0) ? 1 : 0;
+			int msdig = (ms_per_transaction < 99.94) ? 1 : 0;

 			if (Iflag == 0)
 				printf("%4.0Lf%4.0Lf%5.*Lf ",
--- a/usr.sbin/pstat/pstat.c
+++ b/usr.sbin/pstat/pstat.c
@ -95,6 +95,8 @@ static struct {
 #define NNAMES	(sizeof(namelist) / sizeof(*namelist))
 static struct nlist nl[NNAMES];

+#define	SIZEHDR	"Size"
+
 static int	humanflag;
 static int	usenumflag;
 static int	totalflag;
@ -471,7 +473,12 @@ print_swap_header(void)
 	long blocksize;
 	const char *header;

-	header = getbsize(&hlen, &blocksize);
+	if (humanflag) {
+		header = SIZEHDR;
+		hlen = sizeof(SIZEHDR);
+	} else {
+		header = getbsize(&hlen, &blocksize);
+	}
 	if (totalflag == 0)
 		(void)printf("%-15s %*s %8s %8s %8s\n",
 		    "Device", hlen, header,
@ -484,23 +491,30 @@ print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused,
 {
 	char usedbuf[5];
 	char availbuf[5];
+	char sizebuf[5];
 	int hlen, pagesize;
 	long blocksize;

 	pagesize = getpagesize();
 	getbsize(&hlen, &blocksize);

-	printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks));
+	printf("%-15s ", swdevname);
 	if (humanflag) {
+		humanize_number(sizebuf, sizeof(sizebuf),
+		    CONVERT_BLOCKS(nblks), "",
+		    HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
 		humanize_number(usedbuf, sizeof(usedbuf),
 		    CONVERT_BLOCKS(bused), "",
 		    HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
 		humanize_number(availbuf, sizeof(availbuf),
 		    CONVERT_BLOCKS(bavail), "",
 		    HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
-		printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent);
+		printf("%8s %8s %8s %5.0f%%\n", sizebuf,
+		    usedbuf, availbuf, bpercent);
 	} else {
-		printf("%8jd %8jd %5.0f%%\n", (intmax_t)CONVERT(bused),
+		printf("%*jd %8jd %8jd %5.0f%%\n", hlen,
+		    (intmax_t)CONVERT(nblks),
+		    (intmax_t)CONVERT(bused),
 		    (intmax_t)CONVERT(bavail), bpercent);
 	}
 }