Integrate capsicum-test into the FreeBSD test suite

This change takes capsicum-test from upstream and applies some local changes to make the tests work on FreeBSD when executed via Kyua. The local modifications are as follows: 1. Make `OpenatTest.WithFlag` pass with the new dot-dot lookup behavior in FreeBSD 12.x+. 2. capsicum-test references a set of helper binaries: `mini-me`, `mini-me.noexec`, and `mini-me.setuid`, as part of the execve/fexecve tests, via execve, fexecve, and open. It achieves this upstream by assuming `mini-me*` is in the current directory, however, in order for Kyua to execute `capsicum-test`, it needs to provide a full path to `mini-me*`. In order to achieve this, I made `capsicum-test` cache the executable's path from argv[0] in main(..) and use the cached value to compute the path to `mini-me*` as part of the execve/fexecve testcases. 3. The capsicum-test test suite assumes that it's always being run on CAPABILITIES enabled kernels. However, there's a chance that the test will be run on a host without a CAPABILITIES enabled kernel, so we must check for the support before running the tests. The way to achieve this is to add the relevant `feature_present("security_capabilities")` check to SetupEnvironment::SetUp() and skip the tests when the support is not available. While here, add a check for `kern.trap_enotcap` being enabled. As noted by markj@ in https://github.com/google/capsicum-test/issues/23, this sysctl being enabled can trigger non-deterministic failures. Therefore, the tests should be skipped if this sysctl is enabled. All local changes have been submitted to the capsicum-test project (https://github.com/google/capsicum-test) and are in various stages of review. Please see the following pull requests for more details: 1. https://github.com/google/capsicum-test/pull/35 2. https://github.com/google/capsicum-test/pull/41 3. https://github.com/google/capsicum-test/pull/42 Reviewed by: asomers Discussed with: emaste, markj Approved by: emaste (mentor) MFC after: 2 months Differential Revision: https://reviews.freebsd.org/D19758
svn path=/head/; revision=345783
2024-07-21 18:27:22 +00:00 · 2019-04-01 21:24:50 +00:00 · 2019-04-01 21:24:50 +00:00 · 8ac5aef8f3 · 2020-12-20 02:59:44 +00:00
parent 14bffecf74 33d7e3ee78
commit 8ac5aef8f3
35 changed files with 8531 additions and 0 deletions
--- a/contrib/capsicum-test/.gitignore
+++ b/contrib/capsicum-test/.gitignore
@ -0,0 +1,19 @@
+capsicum-test
+mini-me
+mini-me.noexec
+mini-me.setuid
+mini-me.32
+mini-me.x32
+mini-me.64
+libgtest.a
+smoketest
+*.o
+libcap*.deb
+libcap*.dsc
+libcap*.tar.gz
+libcap*.changes
+casper*.deb
+casper*.dsc
+casper*.tar.gz
+casper*.changes
+libcaprights.a
--- a/contrib/capsicum-test/CONTRIBUTING.md
+++ b/contrib/capsicum-test/CONTRIBUTING.md
@ -0,0 +1,20 @@
+## Contributor License Agreement ##
+
+Contributions to any Google project must be accompanied by a Contributor
+License Agreement.  This is not a copyright **assignment**, it simply gives
+Google permission to use and redistribute your contributions as part of the
+project.
+
+  * If you are an individual writing original source code and you're sure you
+    own the intellectual property, then you'll need to sign an [individual
+    CLA][].
+
+  * If you work for a company that wants to allow you to contribute your work,
+    then you'll need to sign a [corporate CLA][].
+
+You generally only need to submit a CLA once, so if you've already submitted
+one (even if it was for a different project), you probably don't need to do it
+again.
+
+[individual CLA]: https://developers.google.com/open-source/cla/individual
+[corporate CLA]: https://developers.google.com/open-source/cla/corporate
--- a/contrib/capsicum-test/GNUmakefile
+++ b/contrib/capsicum-test/GNUmakefile
@ -0,0 +1,78 @@
+OS:=$(shell uname)
+
+# Set ARCH to 32 or x32 for i386/x32 ABIs
+ARCH?=64
+ARCHFLAG=-m$(ARCH)
+
+ifeq ($(OS),Linux)
+PROCESSOR:=$(shell uname -p)
+
+ifneq ($(wildcard /usr/lib/$(PROCESSOR)-linux-gnu),)
+# Can use standard Debian location for static libraries.
+PLATFORM_LIBDIR=/usr/lib/$(PROCESSOR)-linux-gnu
+else
+# Attempt to determine library location from gcc configuration.
+PLATFORM_LIBDIR=$(shell gcc -v 2>&1 | grep "Configured with:" | sed 's/.*--libdir=\(\/usr\/[^ ]*\).*/\1/g')
+endif
+
+# Override for explicitly specified ARCHFLAG.
+# Use locally compiled libcaprights in this case, on the
+# assumption that any installed version is 64-bit.
+ifeq ($(ARCHFLAG),-m32)
+PROCESSOR=i386
+PLATFORM_LIBDIR=/usr/lib32
+LIBCAPRIGHTS=./libcaprights.a
+endif
+ifeq ($(ARCHFLAG),-mx32)
+PROCESSOR=x32
+PLATFORM_LIBDIR=/usr/libx32
+LIBCAPRIGHTS=./libcaprights.a
+endif
+
+# Detect presence of libsctp in normal Debian location
+ifneq ($(wildcard $(PLATFORM_LIBDIR)/libsctp.a),)
+LIBSCTP=-lsctp
+CXXFLAGS=-DHAVE_SCTP
+endif
+
+ifneq ($(LIBCAPRIGHTS),)
+# Build local libcaprights.a (assuming ./configure
+# has already been done in libcaprights/)
+LOCAL_LIBS=$(LIBCAPRIGHTS)
+LIBCAPRIGHTS_OBJS=libcaprights/capsicum.o libcaprights/linux-bpf-capmode.o libcaprights/procdesc.o libcaprights/signal.o
+LOCAL_CLEAN=$(LOCAL_LIBS) $(LIBCAPRIGHTS_OBJS)
+else
+# Detect installed libcaprights static library.
+ifneq ($(wildcard $(PLATFORM_LIBDIR)/libcaprights.a),)
+LIBCAPRIGHTS=$(PLATFORM_LIBDIR)/libcaprights.a
+else
+ifneq ($(wildcard /usr/lib/libcaprights.a),)
+LIBCAPRIGHTS=/usr/lib/libcaprights.a
+endif
+endif
+endif
+
+endif
+
+# Extra test programs for arch-transition tests
+EXTRA_PROGS = mini-me.32 mini-me.64
+ifneq ($(wildcard /usr/include/gnu/stubs-x32.h),)
+EXTRA_PROGS += mini-me.x32
+endif
+
+# Chain on to the master makefile
+include makefile
+
+./libcaprights.a: $(LIBCAPRIGHTS_OBJS)
+	ar cr $@ $^
+
+# Small static programs of known architectures
+# These may require additional packages to be installed; for example, for Debian:
+#  - libc6-dev-i386 provides 32-bit headers for a 64-bit system
+#  - libc6-dev-x32 provides headers for the x32 ABI.
+mini-me.32: mini-me.c
+	$(CC) $(CFLAGS) -m32 -static -o $@ $<
+mini-me.x32: mini-me.c
+	$(CC) $(CFLAGS) -mx32 -static -o $@ $<
+mini-me.64: mini-me.c
+	$(CC) $(CFLAGS) -m64 -static -o $@ $<
--- a/contrib/capsicum-test/LICENSE
+++ b/contrib/capsicum-test/LICENSE
@ -0,0 +1,26 @@
+Copyright (c) 2009-2011 Robert N. M. Watson
+Copyright (c) 2011 Jonathan Anderson
+Copyright (C) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+Copyright (c) 2013-2014 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
--- a/contrib/capsicum-test/README.md
+++ b/contrib/capsicum-test/README.md
@ -0,0 +1,62 @@
+# Capsicum User Space Tests
+
+This directory holds unit tests for [Capsicum](http://www.cl.cam.ac.uk/research/security/capsicum/)
+object-capabilities. The tests exercise the syscall interface to a Capsicum-enabled operating system,
+currently either [FreeBSD >=10.x](http://www.freebsd.org) or a modified Linux kernel (the
+[capsicum-linux](http://github.com/google/capsicum-linux) project).
+
+The tests are written in C++98, and use the [Google Test](https://code.google.com/p/googletest/)
+framework, with some additions to fork off particular tests (because a process that enters capability
+mode cannot leave it again).
+
+## Provenance
+
+The original basis for these tests was:
+
+ - [unit tests](https://github.com/freebsd/freebsd/tree/master/tools/regression/security/cap_test)
+   written by Robert Watson and Jonathan Anderson for the original FreeBSD 9.x Capsicum implementation
+ - [unit tests](http://git.chromium.org/gitweb/?p=chromiumos/third_party/kernel-capsicum.git;a=tree;f=tools/testing/capsicum_tests;hb=refs/heads/capsicum) written by Meredydd Luff for the original Capsicum-Linux port.
+
+These tests were coalesced and moved into an independent repository to enable
+comparative testing across multiple OSes, and then substantially extended.
+
+## OS Configuration
+
+### Linux
+
+The following kernel configuration options are needed to run the tests:
+
+ - `CONFIG_SECURITY_CAPSICUM`: enable the Capsicum framework
+ - `CONFIG_PROCDESC`: enable Capsicum process-descriptor functionality
+ - `CONFIG_DEBUG_FS`: enable debug filesystem
+ - `CONFIG_IP_SCTP`: enable SCTP support
+
+### FreeBSD (>= 10.x)
+
+The following kernel configuration options are needed so that all tests can run:
+
+  - `options P1003_1B_MQUEUE`: Enable POSIX message queues (or `kldload mqueuefs`)
+
+## Other Dependencies
+
+### Linux
+
+The following additional development packages are needed to build the full test suite on Linux.
+
+ - `libcaprights`: See below
+ - `libcap-dev`: Provides headers for POSIX.1e capabilities.
+ - `libsctp1`: Provides SCTP library functions.
+ - `libsctp-dev`: Provides headers for SCTP library functions.
+
+
+## Linux libcaprights
+
+The Capsicum userspace library is held in the `libcaprights/` subdirectory.  Ideally, this
+library should be built (with `./configure; make` or `dpkg-buildpackage -uc -us`) and
+installed (with `make install` or `dpkg -i libcaprights*.deb`) so that the tests will
+use behave like a normal Capsicum-aware application.
+
+However, if no installed copy of the library is found, the `GNUmakefile` will attempt
+to use the local `libcaprights/*.c` source; this requires `./configure` to have been
+performed in the `libcaprights` subdirectory. The local code is also used for
+cross-compiled builds of the test suite (e.g. `make ARCH=32` or `make ARCH=x32`).
--- a/contrib/capsicum-test/capability-fd-pair.cc
+++ b/contrib/capsicum-test/capability-fd-pair.cc
@ -0,0 +1,188 @@
+// Tests involving 2 capability file descriptors.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+TEST(CapabilityPair, sendfile) {
+  int in_fd = open(TmpFile("cap_sendfile_in"), O_CREAT|O_RDWR, 0644);
+  EXPECT_OK(write(in_fd, "1234", 4));
+  // Output fd for sendfile must be a stream socket in FreeBSD.
+  int sock_fds[2];
+  EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+
+  cap_rights_t r_rs;
+  cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+  cap_rights_t r_ws;
+  cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+
+  int cap_in_ro = dup(in_fd);
+  EXPECT_OK(cap_in_ro);
+  EXPECT_OK(cap_rights_limit(cap_in_ro, &r_rs));
+  int cap_in_wo = dup(in_fd);
+  EXPECT_OK(cap_in_wo);
+  EXPECT_OK(cap_rights_limit(cap_in_wo, &r_ws));
+  int cap_out_ro = dup(sock_fds[0]);
+  EXPECT_OK(cap_out_ro);
+  EXPECT_OK(cap_rights_limit(cap_out_ro, &r_rs));
+  int cap_out_wo = dup(sock_fds[0]);
+  EXPECT_OK(cap_out_wo);
+  EXPECT_OK(cap_rights_limit(cap_out_wo, &r_ws));
+
+  off_t offset = 0;
+  EXPECT_NOTCAPABLE(sendfile_(cap_out_ro, cap_in_ro, &offset, 4));
+  EXPECT_NOTCAPABLE(sendfile_(cap_out_wo, cap_in_wo, &offset, 4));
+  EXPECT_OK(sendfile_(cap_out_wo, cap_in_ro, &offset, 4));
+
+  close(cap_in_ro);
+  close(cap_in_wo);
+  close(cap_out_ro);
+  close(cap_out_wo);
+  close(in_fd);
+  close(sock_fds[0]);
+  close(sock_fds[1]);
+  unlink(TmpFile("cap_sendfile_in"));
+}
+
+#ifdef HAVE_TEE
+TEST(CapabilityPair, tee) {
+  int pipe1_fds[2];
+  EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK));
+  int pipe2_fds[2];
+  EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK));
+
+  // Put some data into pipe1.
+  unsigned char buffer[4] = {1, 2, 3, 4};
+  EXPECT_OK(write(pipe1_fds[1], buffer, 4));
+
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ);
+  cap_rights_t r_wo;
+  cap_rights_init(&r_wo, CAP_WRITE);
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+
+  // Various attempts to tee into pipe2.
+  int cap_in_wo = dup(pipe1_fds[0]);
+  EXPECT_OK(cap_in_wo);
+  EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo));
+  int cap_in_rw = dup(pipe1_fds[0]);
+  EXPECT_OK(cap_in_rw);
+  EXPECT_OK(cap_rights_limit(cap_in_rw, &r_rw));
+  int cap_out_ro = dup(pipe2_fds[1]);
+  EXPECT_OK(cap_out_ro);
+  EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro));
+  int cap_out_rw = dup(pipe2_fds[1]);
+  EXPECT_OK(cap_out_rw);
+  EXPECT_OK(cap_rights_limit(cap_out_rw, &r_rw));
+
+  EXPECT_NOTCAPABLE(tee(cap_in_wo, cap_out_rw, 4, SPLICE_F_NONBLOCK));
+  EXPECT_NOTCAPABLE(tee(cap_in_rw, cap_out_ro, 4, SPLICE_F_NONBLOCK));
+  EXPECT_OK(tee(cap_in_rw, cap_out_rw, 4, SPLICE_F_NONBLOCK));
+
+  close(cap_in_wo);
+  close(cap_in_rw);
+  close(cap_out_ro);
+  close(cap_out_rw);
+  close(pipe1_fds[0]);
+  close(pipe1_fds[1]);
+  close(pipe2_fds[0]);
+  close(pipe2_fds[1]);
+}
+#endif
+
+#ifdef HAVE_SPLICE
+TEST(CapabilityPair, splice) {
+  int pipe1_fds[2];
+  EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK));
+  int pipe2_fds[2];
+  EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK));
+
+  // Put some data into pipe1.
+  unsigned char buffer[4] = {1, 2, 3, 4};
+  EXPECT_OK(write(pipe1_fds[1], buffer, 4));
+
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ);
+  cap_rights_t r_wo;
+  cap_rights_init(&r_wo, CAP_WRITE);
+  cap_rights_t r_rs;
+  cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+  cap_rights_t r_ws;
+  cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+
+  // Various attempts to splice.
+  int cap_in_wo = dup(pipe1_fds[0]);
+  EXPECT_OK(cap_in_wo);
+  EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo));
+  int cap_in_ro = dup(pipe1_fds[0]);
+  EXPECT_OK(cap_in_ro);
+  EXPECT_OK(cap_rights_limit(cap_in_ro, &r_ro));
+  int cap_in_ro_seek = dup(pipe1_fds[0]);
+  EXPECT_OK(cap_in_ro_seek);
+  EXPECT_OK(cap_rights_limit(cap_in_ro_seek, &r_rs));
+  int cap_out_wo = dup(pipe2_fds[1]);
+  EXPECT_OK(cap_out_wo);
+  EXPECT_OK(cap_rights_limit(cap_out_wo, &r_wo));
+  int cap_out_ro = dup(pipe2_fds[1]);
+  EXPECT_OK(cap_out_ro);
+  EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro));
+  int cap_out_wo_seek = dup(pipe2_fds[1]);
+  EXPECT_OK(cap_out_wo_seek);
+  EXPECT_OK(cap_rights_limit(cap_out_wo_seek, &r_ws));
+
+  EXPECT_NOTCAPABLE(splice(cap_in_ro, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+  EXPECT_NOTCAPABLE(splice(cap_in_wo, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+  EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_ro, NULL, 4, SPLICE_F_NONBLOCK));
+  EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_wo, NULL, 4, SPLICE_F_NONBLOCK));
+  EXPECT_OK(splice(cap_in_ro_seek, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+
+  close(cap_in_wo);
+  close(cap_in_ro);
+  close(cap_in_ro_seek);
+  close(cap_out_wo);
+  close(cap_out_ro);
+  close(cap_out_wo_seek);
+  close(pipe1_fds[0]);
+  close(pipe1_fds[1]);
+  close(pipe2_fds[0]);
+  close(pipe2_fds[1]);
+}
+#endif
+
+#ifdef HAVE_VMSPLICE
+// Although it only involves a single file descriptor, test vmsplice(2) here too.
+TEST(CapabilityPair, vmsplice) {
+  int pipe_fds[2];
+  EXPECT_OK(pipe2(pipe_fds, O_NONBLOCK));
+
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ);
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+
+  int cap_ro = dup(pipe_fds[1]);
+  EXPECT_OK(cap_ro);
+  EXPECT_OK(cap_rights_limit(cap_ro, &r_ro));
+  int cap_rw = dup(pipe_fds[1]);
+  EXPECT_OK(cap_rw);
+  EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+
+  unsigned char buffer[4] = {1, 2, 3, 4};
+  struct iovec iov;
+  memset(&iov, 0, sizeof(iov));
+  iov.iov_base = buffer;
+  iov.iov_len = sizeof(buffer);
+
+  EXPECT_NOTCAPABLE(vmsplice(cap_ro, &iov, 1, SPLICE_F_NONBLOCK));
+  EXPECT_OK(vmsplice(cap_rw, &iov, 1, SPLICE_F_NONBLOCK));
+
+  close(cap_ro);
+  close(cap_rw);
+  close(pipe_fds[0]);
+  close(pipe_fds[1]);
+}
+#endif
--- a/contrib/capsicum-test/capability-fd.cc
+++ b/contrib/capsicum-test/capability-fd.cc
--- a/contrib/capsicum-test/capmode.cc
+++ b/contrib/capsicum-test/capmode.cc
@ -0,0 +1,654 @@
+// Test routines to make sure a variety of system calls are or are not
+// available in capability mode.  The goal is not to see if they work, just
+// whether or not they return the expected ECAPMODE.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ptrace.h>
+#include <dirent.h>
+#include <netinet/in.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+// Test fixture that opens (and closes) a bunch of files.
+class WithFiles : public ::testing::Test {
+ public:
+  WithFiles() :
+    fd_file_(open(TmpFile("cap_capmode"), O_RDWR|O_CREAT, 0644)),
+    fd_close_(open("/dev/null", O_RDWR)),
+    fd_dir_(open(tmpdir.c_str(), O_RDONLY)),
+    fd_socket_(socket(PF_INET, SOCK_DGRAM, 0)),
+    fd_tcp_socket_(socket(PF_INET, SOCK_STREAM, 0)) {
+    EXPECT_OK(fd_file_);
+    EXPECT_OK(fd_close_);
+    EXPECT_OK(fd_dir_);
+    EXPECT_OK(fd_socket_);
+    EXPECT_OK(fd_tcp_socket_);
+  }
+  ~WithFiles() {
+    if (fd_tcp_socket_ >= 0) close(fd_tcp_socket_);
+    if (fd_socket_ >= 0) close(fd_socket_);
+    if (fd_dir_ >= 0) close(fd_dir_);
+    if (fd_close_ >= 0) close(fd_close_);
+    if (fd_file_ >= 0) close(fd_file_);
+    unlink(TmpFile("cap_capmode"));
+  }
+ protected:
+  int fd_file_;
+  int fd_close_;
+  int fd_dir_;
+  int fd_socket_;
+  int fd_tcp_socket_;
+};
+
+FORK_TEST_F(WithFiles, DisallowedFileSyscalls) {
+  unsigned int mode = -1;
+  EXPECT_OK(cap_getmode(&mode));
+  EXPECT_EQ(0, (int)mode);
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  EXPECT_OK(cap_getmode(&mode));
+  EXPECT_EQ(1, (int)mode);
+
+  // System calls that are not permitted in capability mode.
+  EXPECT_CAPMODE(access(TmpFile("cap_capmode_access"), F_OK));
+  EXPECT_CAPMODE(acct(TmpFile("cap_capmode_acct")));
+  EXPECT_CAPMODE(chdir(TmpFile("cap_capmode_chdir")));
+#ifdef HAVE_CHFLAGS
+  EXPECT_CAPMODE(chflags(TmpFile("cap_capmode_chflags"), UF_NODUMP));
+#endif
+  EXPECT_CAPMODE(chmod(TmpFile("cap_capmode_chmod"), 0644));
+  EXPECT_CAPMODE(chown(TmpFile("cap_capmode_chown"), -1, -1));
+  EXPECT_CAPMODE(chroot(TmpFile("cap_capmode_chroot")));
+  EXPECT_CAPMODE(creat(TmpFile("cap_capmode_creat"), 0644));
+  EXPECT_CAPMODE(fchdir(fd_dir_));
+#ifdef HAVE_GETFSSTAT
+  struct statfs statfs;
+  EXPECT_CAPMODE(getfsstat(&statfs, sizeof(statfs), MNT_NOWAIT));
+#endif
+  EXPECT_CAPMODE(link(TmpFile("foo"), TmpFile("bar")));
+  struct stat sb;
+  EXPECT_CAPMODE(lstat(TmpFile("cap_capmode_lstat"), &sb));
+  EXPECT_CAPMODE(mknod(TmpFile("capmode_mknod"), 0644 | S_IFIFO, 0));
+  EXPECT_CAPMODE(bogus_mount_());
+  EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+  char buf[64];
+  EXPECT_CAPMODE(readlink(TmpFile("cap_capmode_readlink"), buf, sizeof(buf)));
+#ifdef HAVE_REVOKE
+  EXPECT_CAPMODE(revoke(TmpFile("cap_capmode_revoke")));
+#endif
+  EXPECT_CAPMODE(stat(TmpFile("cap_capmode_stat"), &sb));
+  EXPECT_CAPMODE(symlink(TmpFile("cap_capmode_symlink_from"), TmpFile("cap_capmode_symlink_to")));
+  EXPECT_CAPMODE(unlink(TmpFile("cap_capmode_unlink")));
+  EXPECT_CAPMODE(umount2("/not_mounted", 0));
+}
+
+FORK_TEST_F(WithFiles, DisallowedSocketSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  // System calls that are not permitted in capability mode.
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  addr.sin_port = 0;
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  EXPECT_CAPMODE(bind_(fd_socket_, (sockaddr*)&addr, sizeof(addr)));
+  addr.sin_family = AF_INET;
+  addr.sin_port = 53;
+  addr.sin_addr.s_addr = htonl(0x08080808);
+  EXPECT_CAPMODE(connect_(fd_tcp_socket_, (sockaddr*)&addr, sizeof(addr)));
+}
+
+FORK_TEST_F(WithFiles, AllowedFileSyscalls) {
+  int rc;
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  EXPECT_OK(close(fd_close_));
+  fd_close_ = -1;
+  int fd_dup = dup(fd_file_);
+  EXPECT_OK(fd_dup);
+  EXPECT_OK(dup2(fd_file_, fd_dup));
+#ifdef HAVE_DUP3
+  EXPECT_OK(dup3(fd_file_, fd_dup, 0));
+#endif
+  if (fd_dup >= 0) close(fd_dup);
+
+  struct stat sb;
+  EXPECT_OK(fstat(fd_file_, &sb));
+  EXPECT_OK(lseek(fd_file_, 0, SEEK_SET));
+  char ch;
+  EXPECT_OK(read(fd_file_, &ch, sizeof(ch)));
+  EXPECT_OK(write(fd_file_, &ch, sizeof(ch)));
+
+#ifdef HAVE_CHFLAGS
+  rc = fchflags(fd_file_, UF_NODUMP);
+  if (rc < 0) {
+    EXPECT_NE(ECAPMODE, errno);
+  }
+#endif
+
+  char buf[1024];
+  rc = getdents_(fd_dir_, (void*)buf, sizeof(buf));
+  EXPECT_OK(rc);
+
+  char data[] = "123";
+  EXPECT_OK(pwrite(fd_file_, data, 1, 0));
+  EXPECT_OK(pread(fd_file_, data, 1, 0));
+
+  struct iovec io;
+  io.iov_base = data;
+  io.iov_len = 2;
+#if !defined(__i386__) && !defined(__linux__)
+  // TODO(drysdale): reinstate these tests for 32-bit runs when possible
+  // libc bug is fixed.
+  EXPECT_OK(pwritev(fd_file_, &io, 1, 0));
+  EXPECT_OK(preadv(fd_file_, &io, 1, 0));
+#endif
+  EXPECT_OK(writev(fd_file_, &io, 1));
+  EXPECT_OK(readv(fd_file_, &io, 1));
+
+#ifdef HAVE_SYNCFS
+  EXPECT_OK(syncfs(fd_file_));
+#endif
+#ifdef HAVE_SYNC_FILE_RANGE
+  EXPECT_OK(sync_file_range(fd_file_, 0, 1, 0));
+#endif
+#ifdef HAVE_READAHEAD
+  if (!tmpdir_on_tmpfs) {  // tmpfs doesn't support readahead(2)
+    EXPECT_OK(readahead(fd_file_, 0, 1));
+  }
+#endif
+}
+
+FORK_TEST_F(WithFiles, AllowedSocketSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  // recvfrom() either returns -1 with EAGAIN, or 0.
+  int rc = recvfrom(fd_socket_, NULL, 0, MSG_DONTWAIT, NULL, NULL);
+  if (rc < 0) {
+    EXPECT_EQ(EAGAIN, errno);
+  }
+  char ch;
+  EXPECT_OK(write(fd_file_, &ch, sizeof(ch)));
+
+  // These calls will fail for lack of e.g. a proper name to send to,
+  // but they are allowed in capability mode, so errno != ECAPMODE.
+  EXPECT_FAIL_NOT_CAPMODE(accept(fd_socket_, NULL, NULL));
+  EXPECT_FAIL_NOT_CAPMODE(getpeername(fd_socket_, NULL, NULL));
+  EXPECT_FAIL_NOT_CAPMODE(getsockname(fd_socket_, NULL, NULL));
+  EXPECT_FAIL_NOT_CAPMODE(recvmsg(fd_socket_, NULL, 0));
+  EXPECT_FAIL_NOT_CAPMODE(sendmsg(fd_socket_, NULL, 0));
+  EXPECT_FAIL_NOT_CAPMODE(sendto(fd_socket_, NULL, 0, 0, NULL, 0));
+  off_t offset = 0;
+  EXPECT_FAIL_NOT_CAPMODE(sendfile_(fd_socket_, fd_file_, &offset, 1));
+
+  // The socket/socketpair syscalls are allowed, but they don't give
+  // anything externally useful (can't call bind/connect on them).
+  int fd_socket2 = socket(PF_INET, SOCK_DGRAM, 0);
+  EXPECT_OK(fd_socket2);
+  if (fd_socket2 >= 0) close(fd_socket2);
+  int fd_pair[2] = {-1, -1};
+  EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, fd_pair));
+  if (fd_pair[0] >= 0) close(fd_pair[0]);
+  if (fd_pair[1] >= 0) close(fd_pair[1]);
+}
+
+#ifdef HAVE_SEND_RECV_MMSG
+FORK_TEST(Capmode, AllowedMmsgSendRecv) {
+  int fd_socket = socket(PF_INET, SOCK_DGRAM, 0);
+
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  addr.sin_port = htons(0);
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  EXPECT_OK(bind(fd_socket, (sockaddr*)&addr, sizeof(addr)));
+
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  char buffer[256] = {0};
+  struct iovec iov;
+  iov.iov_base = buffer;
+  iov.iov_len = sizeof(buffer);
+  struct mmsghdr mm;
+  memset(&mm, 0, sizeof(mm));
+  mm.msg_hdr.msg_iov = &iov;
+  mm.msg_hdr.msg_iovlen = 1;
+  struct timespec ts;
+  ts.tv_sec = 1;
+  ts.tv_nsec = 100;
+  EXPECT_FAIL_NOT_CAPMODE(recvmmsg(fd_socket, &mm, 1, MSG_DONTWAIT, &ts));
+  EXPECT_FAIL_NOT_CAPMODE(sendmmsg(fd_socket, &mm, 1, 0));
+  close(fd_socket);
+}
+#endif
+
+FORK_TEST(Capmode, AllowedIdentifierSyscalls) {
+  // Record some identifiers
+  gid_t my_gid = getgid();
+  pid_t my_pid = getpid();
+  pid_t my_ppid = getppid();
+  uid_t my_uid = getuid();
+  pid_t my_sid = getsid(my_pid);
+
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  EXPECT_EQ(my_gid, getegid_());
+  EXPECT_EQ(my_uid, geteuid_());
+  EXPECT_EQ(my_gid, getgid_());
+  EXPECT_EQ(my_pid, getpid());
+  EXPECT_EQ(my_ppid, getppid());
+  EXPECT_EQ(my_uid, getuid_());
+  EXPECT_EQ(my_sid, getsid(my_pid));
+  gid_t grps[128];
+  EXPECT_OK(getgroups_(128, grps));
+  uid_t ruid;
+  uid_t euid;
+  uid_t suid;
+  EXPECT_OK(getresuid(&ruid, &euid, &suid));
+  gid_t rgid;
+  gid_t egid;
+  gid_t sgid;
+  EXPECT_OK(getresgid(&rgid, &egid, &sgid));
+#ifdef HAVE_GETLOGIN
+  EXPECT_TRUE(getlogin() != NULL);
+#endif
+
+  // Set various identifiers (to their existing values).
+  EXPECT_OK(setgid(my_gid));
+#ifdef HAVE_SETFSGID
+  EXPECT_OK(setfsgid(my_gid));
+#endif
+  EXPECT_OK(setuid(my_uid));
+#ifdef HAVE_SETFSUID
+  EXPECT_OK(setfsuid(my_uid));
+#endif
+  EXPECT_OK(setregid(my_gid, my_gid));
+  EXPECT_OK(setresgid(my_gid, my_gid, my_gid));
+  EXPECT_OK(setreuid(my_uid, my_uid));
+  EXPECT_OK(setresuid(my_uid, my_uid, my_uid));
+  EXPECT_OK(setsid());
+}
+
+FORK_TEST(Capmode, AllowedSchedSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  int policy = sched_getscheduler(0);
+  EXPECT_OK(policy);
+  struct sched_param sp;
+  EXPECT_OK(sched_getparam(0, &sp));
+  if (policy >= 0 && (!SCHED_SETSCHEDULER_REQUIRES_ROOT || getuid() == 0)) {
+    EXPECT_OK(sched_setscheduler(0, policy, &sp));
+  }
+  EXPECT_OK(sched_setparam(0, &sp));
+  EXPECT_OK(sched_get_priority_max(policy));
+  EXPECT_OK(sched_get_priority_min(policy));
+  struct timespec ts;
+  EXPECT_OK(sched_rr_get_interval(0, &ts));
+  EXPECT_OK(sched_yield());
+}
+
+
+FORK_TEST(Capmode, AllowedTimerSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  struct timespec ts;
+  EXPECT_OK(clock_getres(CLOCK_REALTIME, &ts));
+  EXPECT_OK(clock_gettime(CLOCK_REALTIME, &ts));
+  struct itimerval itv;
+  EXPECT_OK(getitimer(ITIMER_REAL, &itv));
+  EXPECT_OK(setitimer(ITIMER_REAL, &itv, NULL));
+  struct timeval tv;
+  struct timezone tz;
+  EXPECT_OK(gettimeofday(&tv, &tz));
+  ts.tv_sec = 0;
+  ts.tv_nsec = 1;
+  EXPECT_OK(nanosleep(&ts, NULL));
+}
+
+
+FORK_TEST(Capmode, AllowedProfilSyscall) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  char sbuf[32];
+  EXPECT_OK(profil((profil_arg1_t*)sbuf, sizeof(sbuf), 0, 1));
+}
+
+
+FORK_TEST(Capmode, AllowedResourceSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  errno = 0;
+  int rc = getpriority(PRIO_PROCESS, 0);
+  EXPECT_EQ(0, errno);
+  EXPECT_OK(setpriority(PRIO_PROCESS, 0, rc));
+  struct rlimit rlim;
+  EXPECT_OK(getrlimit_(RLIMIT_CORE, &rlim));
+  EXPECT_OK(setrlimit(RLIMIT_CORE, &rlim));
+  struct rusage ruse;
+  EXPECT_OK(getrusage(RUSAGE_SELF, &ruse));
+}
+
+FORK_TEST(CapMode, AllowedMmapSyscalls) {
+  // mmap() some memory.
+  size_t mem_size = getpagesize();
+  void *mem = mmap(NULL, mem_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+  EXPECT_TRUE(mem != NULL);
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  EXPECT_OK(msync(mem, mem_size, MS_ASYNC));
+  EXPECT_OK(madvise(mem, mem_size, MADV_NORMAL));
+  unsigned char vec[2];
+  EXPECT_OK(mincore_(mem, mem_size, vec));
+  EXPECT_OK(mprotect(mem, mem_size, PROT_READ|PROT_WRITE));
+
+  if (!MLOCK_REQUIRES_ROOT || getuid() == 0) {
+    EXPECT_OK(mlock(mem, mem_size));
+    EXPECT_OK(munlock(mem, mem_size));
+    int rc = mlockall(MCL_CURRENT);
+    if (rc != 0) {
+      // mlockall may well fail with ENOMEM for non-root users, as the
+      // default RLIMIT_MEMLOCK value isn't that big.
+      EXPECT_NE(ECAPMODE, errno);
+    }
+    EXPECT_OK(munlockall());
+  }
+  // Unmap the memory.
+  EXPECT_OK(munmap(mem, mem_size));
+}
+
+FORK_TEST(Capmode, AllowedPipeSyscalls) {
+  EXPECT_OK(cap_enter());  // Enter capability mode
+  int fd2[2];
+  int rc = pipe(fd2);
+  EXPECT_EQ(0, rc);
+
+#ifdef HAVE_VMSPLICE
+  char buf[11] = "0123456789";
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  EXPECT_FAIL_NOT_CAPMODE(vmsplice(fd2[0], &iov, 1, SPLICE_F_NONBLOCK));
+#endif
+
+  if (rc == 0) {
+    close(fd2[0]);
+    close(fd2[1]);
+  };
+#ifdef HAVE_PIPE2
+  rc = pipe2(fd2, 0);
+  EXPECT_EQ(0, rc);
+  if (rc == 0) {
+    close(fd2[0]);
+    close(fd2[1]);
+  };
+#endif
+}
+
+TEST(Capmode, AllowedAtSyscalls) {
+  int rc = mkdir(TmpFile("cap_at_syscalls"), 0755);
+  EXPECT_OK(rc);
+  if (rc < 0 && errno != EEXIST) return;
+  int dfd = open(TmpFile("cap_at_syscalls"), O_RDONLY);
+  EXPECT_OK(dfd);
+
+  int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644);
+  EXPECT_OK(file);
+  EXPECT_OK(close(file));
+
+
+  pid_t child = fork();
+  if (child == 0) {
+    // Child: enter cap mode and run tests
+    EXPECT_OK(cap_enter());  // Enter capability mode
+
+    struct stat fs;
+    EXPECT_OK(fstatat(dfd, "testfile", &fs, 0));
+    EXPECT_OK(mkdirat(dfd, "subdir", 0600));
+    EXPECT_OK(fchmodat(dfd, "subdir", 0644, 0));
+    EXPECT_OK(faccessat(dfd, "subdir", F_OK, 0));
+    EXPECT_OK(renameat(dfd, "subdir", dfd, "subdir2"));
+    EXPECT_OK(renameat(dfd, "subdir2", dfd, "subdir"));
+    struct timeval tv[2];
+    struct timezone tz;
+    EXPECT_OK(gettimeofday(&tv[0], &tz));
+    EXPECT_OK(gettimeofday(&tv[1], &tz));
+    EXPECT_OK(futimesat(dfd, "testfile", tv));
+
+    EXPECT_OK(fchownat(dfd, "testfile",  fs.st_uid, fs.st_gid, 0));
+    EXPECT_OK(linkat(dfd, "testfile", dfd, "linky", 0));
+    EXPECT_OK(symlinkat("testfile", dfd, "symlink"));
+    char buffer[256];
+    EXPECT_OK(readlinkat(dfd, "symlink", buffer, sizeof(buffer)));
+    EXPECT_OK(unlinkat(dfd, "linky", 0));
+    EXPECT_OK(unlinkat(dfd, "subdir", AT_REMOVEDIR));
+
+    // Check that invalid requests get a non-Capsicum errno.
+    errno = 0;
+    rc = readlinkat(-1, "symlink", buffer, sizeof(buffer));
+    EXPECT_GE(0, rc);
+    EXPECT_NE(ECAPMODE, errno);
+
+    exit(HasFailure());
+  }
+
+  // Wait for the child.
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  // Tidy up.
+  close(dfd);
+  rmdir(TmpFile("cap_at_syscalls/subdir"));
+  unlink(TmpFile("cap_at_syscalls/symlink"));
+  unlink(TmpFile("cap_at_syscalls/linky"));
+  unlink(TmpFile("cap_at_syscalls/testfile"));
+  rmdir(TmpFile("cap_at_syscalls"));
+}
+
+TEST(Capmode, AllowedAtSyscallsCwd) {
+  int rc = mkdir(TmpFile("cap_at_syscalls_cwd"), 0755);
+  EXPECT_OK(rc);
+  if (rc < 0 && errno != EEXIST) return;
+  int dfd = open(TmpFile("cap_at_syscalls_cwd"), O_RDONLY);
+  EXPECT_OK(dfd);
+
+  int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644);
+  EXPECT_OK(file);
+  EXPECT_OK(close(file));
+
+  pid_t child = fork();
+  if (child == 0) {
+    // Child: move into temp dir, enter cap mode and run tests
+    EXPECT_OK(fchdir(dfd));
+    EXPECT_OK(cap_enter());  // Enter capability mode
+
+    // Test that *at(AT_FDCWD, path,...) is policed with ECAPMODE.
+    EXPECT_CAPMODE(openat(AT_FDCWD, "testfile", O_RDONLY));
+    struct stat fs;
+    EXPECT_CAPMODE(fstatat(AT_FDCWD, "testfile", &fs, 0));
+    EXPECT_CAPMODE(mkdirat(AT_FDCWD, "subdir", 0600));
+    EXPECT_CAPMODE(fchmodat(AT_FDCWD, "subdir", 0644, 0));
+    EXPECT_CAPMODE(faccessat(AT_FDCWD, "subdir", F_OK, 0));
+    EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir", AT_FDCWD, "subdir2"));
+    EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir2", AT_FDCWD, "subdir"));
+    struct timeval tv[2];
+    struct timezone tz;
+    EXPECT_OK(gettimeofday(&tv[0], &tz));
+    EXPECT_OK(gettimeofday(&tv[1], &tz));
+    EXPECT_CAPMODE(futimesat(AT_FDCWD, "testfile", tv));
+
+    EXPECT_CAPMODE(fchownat(AT_FDCWD, "testfile",  fs.st_uid, fs.st_gid, 0));
+    EXPECT_CAPMODE(linkat(AT_FDCWD, "testfile", AT_FDCWD, "linky", 0));
+    EXPECT_CAPMODE(symlinkat("testfile", AT_FDCWD, "symlink"));
+    char buffer[256];
+    EXPECT_CAPMODE(readlinkat(AT_FDCWD, "symlink", buffer, sizeof(buffer)));
+    EXPECT_CAPMODE(unlinkat(AT_FDCWD, "linky", 0));
+
+    exit(HasFailure());
+  }
+
+  // Wait for the child.
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  // Tidy up.
+  close(dfd);
+  rmdir(TmpFile("cap_at_syscalls_cwd/subdir"));
+  unlink(TmpFile("cap_at_syscalls_cwd/symlink"));
+  unlink(TmpFile("cap_at_syscalls_cwd/linky"));
+  unlink(TmpFile("cap_at_syscalls_cwd/testfile"));
+  rmdir(TmpFile("cap_at_syscalls_cwd"));
+}
+
+TEST(Capmode, Abort) {
+  // Check that abort(3) works even in capability mode.
+  pid_t child = fork();
+  if (child == 0) {
+    // Child: enter capability mode and call abort(3).
+    // Triggers something like kill(getpid(), SIGABRT).
+    cap_enter();  // Enter capability mode.
+    abort();
+    exit(99);
+  }
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  EXPECT_TRUE(WIFSIGNALED(status)) << " status = " << std::hex << status;
+  EXPECT_EQ(SIGABRT, WTERMSIG(status)) << " status = " << std::hex << status;
+}
+
+FORK_TEST_F(WithFiles, AllowedMiscSyscalls) {
+  umask(022);
+  mode_t um_before = umask(022);
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  mode_t um = umask(022);
+  EXPECT_NE(-ECAPMODE, (int)um);
+  EXPECT_EQ(um_before, um);
+  stack_t ss;
+  EXPECT_OK(sigaltstack(NULL, &ss));
+
+  // Finally, tests for system calls that don't fit the pattern very well.
+  pid_t pid = fork();
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: almost immediately exit.
+    sleep(1);
+    exit(0);
+  } else if (pid > 0) {
+    errno = 0;
+    EXPECT_CAPMODE(ptrace_(PTRACE_PEEKDATA_, pid, &pid, NULL));
+    EXPECT_CAPMODE(waitpid(pid, NULL, 0));
+  }
+
+  // No error return from sync(2) to test, but check errno remains unset.
+  errno = 0;
+  sync();
+  EXPECT_EQ(0, errno);
+
+  // TODO(FreeBSD): ktrace
+
+#ifdef HAVE_SYSARCH
+  // sysarch() is, by definition, architecture-dependent
+#if defined (__amd64__) || defined (__i386__)
+  long sysarch_arg = 0;
+  EXPECT_CAPMODE(sysarch(I386_SET_IOPERM, &sysarch_arg));
+#else
+  // TOOD(jra): write a test for other architectures, like arm
+#endif
+#endif
+}
+
+void *thread_fn(void *p) {
+  int delay = *(int *)p;
+  sleep(delay);
+  EXPECT_OK(getpid_());
+  EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+  return NULL;
+}
+
+// Check that restrictions are the same in subprocesses and threads
+FORK_TEST(Capmode, NewThread) {
+  // Fire off a new thread before entering capability mode
+  pthread_t early_thread;
+  int one = 1;  // second
+  EXPECT_OK(pthread_create(&early_thread, NULL, thread_fn, &one));
+
+  // Fire off a new process before entering capability mode.
+  int early_child = fork();
+  EXPECT_OK(early_child);
+  if (early_child == 0) {
+    // Child: wait and then confirm this process is unaffect by capability mode in the parent.
+    sleep(1);
+    int fd = open("/dev/null", O_RDWR);
+    EXPECT_OK(fd);
+    close(fd);
+    exit(0);
+  }
+
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+  // Do an allowed syscall.
+  EXPECT_OK(getpid_());
+  int child = fork();
+  EXPECT_OK(child);
+  if (child == 0) {
+    // Child: do an allowed and a disallowed syscall.
+    EXPECT_OK(getpid_());
+    EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+    exit(0);
+  }
+  // Don't (can't) wait for either child.
+
+  // Wait for the early-started thread.
+  EXPECT_OK(pthread_join(early_thread, NULL));
+
+  // Fire off a new thread.
+  pthread_t child_thread;
+  int zero = 0; // seconds
+  EXPECT_OK(pthread_create(&child_thread, NULL, thread_fn, &zero));
+  EXPECT_OK(pthread_join(child_thread, NULL));
+
+  // Fork a subprocess which fires off a new thread.
+  child = fork();
+  EXPECT_OK(child);
+  if (child == 0) {
+    pthread_t child_thread2;
+    EXPECT_OK(pthread_create(&child_thread2, NULL, thread_fn, &zero));
+    EXPECT_OK(pthread_join(child_thread2, NULL));
+    exit(0);
+  }
+  // Sleep for a bit to allow the subprocess to finish.
+  sleep(2);
+}
+
+static int had_signal = 0;
+static void handle_signal(int) { had_signal = 1; }
+
+FORK_TEST(Capmode, SelfKill) {
+  pid_t me = getpid();
+  sighandler_t original = signal(SIGUSR1, handle_signal);
+
+  pid_t child = fork();
+  if (child == 0) {
+    // Child: sleep and exit
+    sleep(1);
+    exit(0);
+  }
+
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  // Can only kill(2) to own pid.
+  EXPECT_CAPMODE(kill(child, SIGUSR1));
+  EXPECT_OK(kill(me, SIGUSR1));
+  EXPECT_EQ(1, had_signal);
+
+  signal(SIGUSR1, original);
+}
--- a/contrib/capsicum-test/capsicum-freebsd.h
+++ b/contrib/capsicum-test/capsicum-freebsd.h
@ -0,0 +1,73 @@
+#ifndef __CAPSICUM_FREEBSD_H__
+#define __CAPSICUM_FREEBSD_H__
+#ifdef __FreeBSD__
+/************************************************************
+ * FreeBSD Capsicum Functionality.
+ ************************************************************/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* FreeBSD definitions. */
+#include <errno.h>
+#include <sys/param.h>
+#if __FreeBSD_version >= 1100014 || \
+    (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000)
+#include <sys/capsicum.h>
+#else
+#include <sys/capability.h>
+#endif
+#include <sys/procdesc.h>
+
+#if __FreeBSD_version >= 1000000
+#define AT_SYSCALLS_IN_CAPMODE
+#define HAVE_CAP_RIGHTS_GET
+#define HAVE_CAP_RIGHTS_LIMIT
+#define HAVE_PROCDESC_FSTAT
+#define HAVE_CAP_FCNTLS_LIMIT
+// fcntl(2) takes int, cap_fcntls_limit(2) takes uint32_t.
+typedef uint32_t cap_fcntl_t;
+#define HAVE_CAP_IOCTLS_LIMIT
+// ioctl(2) and cap_ioctls_limit(2) take unsigned long.
+typedef unsigned long cap_ioctl_t;
+
+#if __FreeBSD_version >= 1101000
+#define HAVE_OPENAT_INTERMEDIATE_DOTDOT
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+// Use fexecve_() in tests to allow Linux variant to bypass glibc version.
+#define fexecve_(F, A, E) fexecve(F, A, E)
+
+#ifdef ENOTBENEATH
+#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH
+#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH
+#else
+#define E_NO_TRAVERSE_CAPABILITY ENOTCAPABLE
+#define E_NO_TRAVERSE_O_BENEATH ENOTCAPABLE
+#endif
+
+// FreeBSD limits the number of ioctls in cap_ioctls_limit to 256
+#define CAP_IOCTLS_LIMIT_MAX 256
+
+// Too many links
+#define E_TOO_MANY_LINKS EMLINK
+
+// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on accept.
+// FreeBSD does not generate a capability from accept(cap_fd,...).
+// https://bugs.freebsd.org/201052
+// #define CAP_FROM_ACCEPT
+// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on sctp_peeloff.
+// FreeBSD does not generate a capability from sctp_peeloff(cap_fd,...).
+// https://bugs.freebsd.org/201052
+// #define CAP_FROM_PEELOFF
+
+#endif  /* __FreeBSD__ */
+
+#endif /*__CAPSICUM_FREEBSD_H__*/
--- a/contrib/capsicum-test/capsicum-linux.h
+++ b/contrib/capsicum-test/capsicum-linux.h
@ -0,0 +1,40 @@
+#ifndef __CAPSICUM_LINUX_H__
+#define __CAPSICUM_LINUX_H__
+
+#ifdef __linux__
+/************************************************************
+ * Linux Capsicum Functionality.
+ ************************************************************/
+#include <errno.h>
+#include <sys/procdesc.h>
+#include <sys/capsicum.h>
+
+#define HAVE_CAP_RIGHTS_LIMIT
+#define HAVE_CAP_RIGHTS_GET
+#define HAVE_CAP_FCNTLS_LIMIT
+#define HAVE_CAP_IOCTLS_LIMIT
+#define HAVE_PROC_FDINFO
+#define HAVE_PDWAIT4
+#define CAP_FROM_ACCEPT
+// TODO(drysdale): uncomment if/when Linux propagates rights on sctp_peeloff.
+// Linux does not generate a capability from sctp_peeloff(cap_fd,...).
+// #define CAP_FROM_PEELOFF
+// TODO(drysdale): uncomment if/when Linux allows intermediate .. path segments
+// for openat()-like operations.
+// #define HAVE_OPENAT_INTERMEDIATE_DOTDOT
+
+// Failure to open file due to path traversal generates EPERM
+#ifdef ENOTBENEATH
+#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH
+#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH
+#else
+#define E_NO_TRAVERSE_CAPABILITY EPERM
+#define E_NO_TRAVERSE_O_BENEATH EPERM
+#endif
+
+// Too many links
+#define E_TOO_MANY_LINKS ELOOP
+
+#endif /* __linux__ */
+
+#endif /*__CAPSICUM_LINUX_H__*/
--- a/contrib/capsicum-test/capsicum-rights.h
+++ b/contrib/capsicum-test/capsicum-rights.h
@ -0,0 +1,118 @@
+#ifndef __CAPSICUM_RIGHTS_H__
+#define __CAPSICUM_RIGHTS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/param.h>
+#if __FreeBSD_version >= 1100014 || \
+    (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000)
+#include <sys/capsicum.h>
+#else
+#include <sys/capability.h>
+#endif
+#endif
+
+#ifdef __linux__
+#include <linux/capsicum.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef CAP_RIGHTS_VERSION
+/************************************************************
+ * Capsicum compatibility layer: implement new (FreeBSD10.x)
+ * rights manipulation API in terms of original (FreeBSD9.x)
+ * functionality.
+ ************************************************************/
+#include <stdarg.h>
+#include <stdbool.h>
+
+/* Rights manipulation macros/functions.
+ * Note that these use variadic macros, available in C99 / C++11 (and
+ * also in earlier gcc versions).
+ */
+#define cap_rights_init(rights, ...)   _cap_rights_init((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_set(rights, ...)    _cap_rights_set((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_clear(rights, ...)  _cap_rights_clear((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_is_set(rights, ...) _cap_rights_is_set((rights), __VA_ARGS__, 0ULL)
+
+inline cap_rights_t* _cap_rights_init(cap_rights_t *rights, ...) {
+  va_list ap;
+  cap_rights_t right;
+  *rights = 0;
+  va_start(ap, rights);
+  while (true) {
+    right = va_arg(ap, cap_rights_t);
+    *rights |= right;
+    if (right == 0) break;
+  }
+  va_end(ap);
+  return rights;
+}
+
+inline cap_rights_t* _cap_rights_set(cap_rights_t *rights, ...) {
+  va_list ap;
+  cap_rights_t right;
+  va_start(ap, rights);
+  while (true) {
+    right = va_arg(ap, cap_rights_t);
+    *rights |= right;
+    if (right == 0) break;
+  }
+  va_end(ap);
+  return rights;
+}
+
+inline cap_rights_t* _cap_rights_clear(cap_rights_t *rights, ...) {
+  va_list ap;
+  cap_rights_t right;
+  va_start(ap, rights);
+  while (true) {
+    right = va_arg(ap, cap_rights_t);
+    *rights &= ~right;
+    if (right == 0) break;
+  }
+  va_end(ap);
+  return rights;
+}
+
+inline bool _cap_rights_is_set(const cap_rights_t *rights, ...) {
+  va_list ap;
+  cap_rights_t right;
+  cap_rights_t accumulated = 0;
+  va_start(ap, rights);
+  while (true) {
+    right = va_arg(ap, cap_rights_t);
+    accumulated |= right;
+    if (right == 0) break;
+  }
+  va_end(ap);
+  return (accumulated & *rights) == accumulated;
+}
+
+inline bool _cap_rights_is_valid(const cap_rights_t *rights) {
+  return true;
+}
+
+inline cap_rights_t* cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src) {
+  *dst |= *src;
+  return dst;
+}
+
+inline cap_rights_t* cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src) {
+  *dst &= ~(*src);
+  return dst;
+}
+
+inline bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little) {
+  return ((*big) & (*little)) == (*little);
+}
+
+#endif  /* old/new style rights manipulation */
+
+#endif /*__CAPSICUM_RIGHTS_H__*/
--- a/contrib/capsicum-test/capsicum-test-main.cc
+++ b/contrib/capsicum-test/capsicum-test-main.cc
@ -0,0 +1,156 @@
+#include <sys/types.h>
+#ifdef __linux__
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#elif defined(__FreeBSD__)
+#include <sys/sysctl.h>
+#endif
+#include <ctype.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <iostream>
+#include "gtest/gtest.h"
+#include "capsicum-test.h"
+
+// For versions of googletest that lack GTEST_SKIP.
+#ifndef GTEST_SKIP
+#define GTEST_SKIP GTEST_FAIL
+#endif
+
+std::string tmpdir;
+
+class SetupEnvironment : public ::testing::Environment
+{
+public:
+  SetupEnvironment() : teardown_tmpdir_(false) {}
+  void SetUp() override {
+    CheckCapsicumSupport();
+    if (tmpdir.empty()) {
+      std::cerr << "Generating temporary directory root: ";
+      CreateTemporaryRoot();
+    } else {
+      std::cerr << "User provided temporary directory root: ";
+    }
+    std::cerr << tmpdir << std::endl;
+  }
+  void CheckCapsicumSupport() {
+#ifdef __FreeBSD__
+    int rc;
+    bool trap_enotcap_enabled;
+    size_t trap_enotcap_enabled_len = sizeof(trap_enotcap_enabled);
+
+    if (feature_present("security_capabilities") == 0) {
+      GTEST_SKIP() << "Skipping tests because capsicum support is not "
+                   << "enabled in the kernel.";
+    }
+    // If this OID is enabled, it will send SIGTRAP to the process when
+    // `ENOTCAPABLE` is returned.
+    const char *oid = "kern.trap_enotcap";
+    rc = sysctlbyname(oid, &trap_enotcap_enabled, &trap_enotcap_enabled_len,
+      nullptr, 0);
+    if (rc != 0) {
+      GTEST_FAIL() << "sysctlbyname failed: " << strerror(errno);
+    }
+    if (trap_enotcap_enabled) {
+      GTEST_SKIP() << "Debug sysctl, " << oid << ", enabled. "
+                   << "Skipping tests because its enablement invalidates the "
+                   << "test results.";
+    }
+#endif /* FreeBSD */
+  }
+  void CreateTemporaryRoot() {
+    char *tmpdir_name = tempnam(nullptr, "cptst");
+
+    ASSERT_NE(tmpdir_name, nullptr);
+    ASSERT_EQ(mkdir(tmpdir_name, 0700), 0) <<
+        "Could not create temp directory, " << tmpdir_name << ": " <<
+        strerror(errno);
+    tmpdir = std::string(tmpdir_name);
+    free(tmpdir_name);
+    teardown_tmpdir_ = true;
+  }
+  void TearDown() override {
+    if (teardown_tmpdir_) {
+      rmdir(tmpdir.c_str());
+    }
+  }
+private:
+  bool teardown_tmpdir_;
+};
+
+std::string capsicum_test_bindir;
+
+int main(int argc, char* argv[]) {
+  // Set up the test program path, so capsicum-test can find programs, like
+  // mini-me* when executed from an absolute path.
+  {
+    char *new_path, *old_path, *program_name;
+
+    program_name = strdup(argv[0]);
+    assert(program_name);
+    capsicum_test_bindir = std::string(dirname(program_name));
+    free(program_name);
+
+    old_path = getenv("PATH");
+    assert(old_path);
+
+    assert(asprintf(&new_path, "%s:%s", capsicum_test_bindir.c_str(),
+      old_path) > 0);
+    assert(setenv("PATH", new_path, 1) == 0);
+  }
+
+  ::testing::InitGoogleTest(&argc, argv);
+  for (int ii = 1; ii < argc; ii++) {
+    if (strcmp(argv[ii], "-v") == 0) {
+      verbose = true;
+    } else if (strcmp(argv[ii], "-T") == 0) {
+      ii++;
+      assert(ii < argc);
+      tmpdir = argv[ii];
+      struct stat info;
+      stat(tmpdir.c_str(), &info);
+      assert(S_ISDIR(info.st_mode));
+    } else if (strcmp(argv[ii], "-t") == 0) {
+      force_mt = true;
+    } else if (strcmp(argv[ii], "-F") == 0) {
+      force_nofork = true;
+    } else if (strcmp(argv[ii], "-u") == 0) {
+      if (++ii >= argc) {
+        std::cerr << "-u needs argument" << std::endl;
+        exit(1);
+      }
+      if (isdigit(argv[ii][0])) {
+        other_uid = atoi(argv[ii]);
+      } else {
+        struct passwd *p = getpwnam(argv[ii]);
+        if (!p) {
+          std::cerr << "Failed to get entry for " << argv[ii] << ", errno=" << errno << std::endl;
+          exit(1);
+        }
+        other_uid = p->pw_uid;
+      }
+    }
+  }
+  if (other_uid == 0) {
+    struct stat info;
+    if (stat(argv[0], &info) == 0) {
+      other_uid = info.st_uid;
+    }
+  }
+
+#ifdef __linux__
+  // Check whether our temporary directory is on a tmpfs volume.
+  struct statfs fsinfo;
+  statfs(tmpdir.c_str(), &fsinfo);
+  tmpdir_on_tmpfs = (fsinfo.f_type == TMPFS_MAGIC);
+#endif
+
+  testing::AddGlobalTestEnvironment(new SetupEnvironment());
+  int rc = RUN_ALL_TESTS();
+  ShowSkippedTests(std::cerr);
+  return rc;
+}
--- a/contrib/capsicum-test/capsicum-test.cc
+++ b/contrib/capsicum-test/capsicum-test.cc
@ -0,0 +1,102 @@
+#include "capsicum-test.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+
+#include <map>
+#include <vector>
+#include <string>
+
+bool verbose = false;
+bool tmpdir_on_tmpfs = false;
+bool force_mt = false;
+bool force_nofork = false;
+uid_t other_uid = 0;
+
+namespace {
+std::map<std::string, std::string> tmp_paths;
+}
+
+const char *TmpFile(const char *p) {
+  std::string pathname(p);
+  if (tmp_paths.find(pathname) == tmp_paths.end()) {
+    std::string fullname = tmpdir + "/" + pathname;
+    tmp_paths[pathname] = fullname;
+  }
+  return tmp_paths[pathname].c_str();
+}
+
+char ProcessState(int pid) {
+#ifdef __linux__
+  // Open the process status file.
+  char s[1024];
+  snprintf(s, sizeof(s), "/proc/%d/status", pid);
+  FILE *f = fopen(s, "r");
+  if (f == NULL) return '\0';
+
+  // Read the file line by line looking for the state line.
+  const char *prompt = "State:\t";
+  while (!feof(f)) {
+    fgets(s, sizeof(s), f);
+    if (!strncmp(s, prompt, strlen(prompt))) {
+      fclose(f);
+      return s[strlen(prompt)];
+    }
+  }
+  fclose(f);
+  return '?';
+#endif
+#ifdef __FreeBSD__
+  char buffer[1024];
+  snprintf(buffer, sizeof(buffer), "ps -p %d -o state | grep -v STAT", pid);
+  sig_t original = signal(SIGCHLD, SIG_IGN);
+  FILE* cmd = popen(buffer, "r");
+  usleep(50000);  // allow any pending SIGCHLD signals to arrive
+  signal(SIGCHLD, original);
+  int result = fgetc(cmd);
+  fclose(cmd);
+  // Map FreeBSD codes to Linux codes.
+  switch (result) {
+    case EOF:
+      return '\0';
+    case 'D': // disk wait
+    case 'R': // runnable
+    case 'S': // sleeping
+    case 'T': // stopped
+    case 'Z': // zombie
+      return result;
+    case 'W': // idle interrupt thread
+      return 'S';
+    case 'I': // idle
+      return 'S';
+    case 'L': // waiting to acquire lock
+    default:
+      return '?';
+  }
+#endif
+}
+
+typedef std::vector<std::string> TestList;
+typedef std::map<std::string, TestList*> SkippedTestMap;
+static SkippedTestMap skipped_tests;
+void TestSkipped(const char *testcase, const char *test, const std::string& reason) {
+  if (skipped_tests.find(reason) == skipped_tests.end()) {
+    skipped_tests[reason] = new TestList;
+  }
+  std::string testname(testcase);
+  testname += ".";
+  testname += test;
+  skipped_tests[reason]->push_back(testname);
+}
+
+void ShowSkippedTests(std::ostream& os) {
+  for (SkippedTestMap::iterator skiplist = skipped_tests.begin();
+       skiplist != skipped_tests.end(); ++skiplist) {
+    os << "Following tests were skipped because: " << skiplist->first << std::endl;
+    for (size_t ii = 0; ii < skiplist->second->size(); ++ii) {
+      const std::string& testname((*skiplist->second)[ii]);
+      os << "  " << testname << std::endl;
+    }
+  }
+}
--- a/contrib/capsicum-test/capsicum-test.h
+++ b/contrib/capsicum-test/capsicum-test.h
@ -0,0 +1,260 @@
+/* -*- C++ -*- */
+#ifndef CAPSICUM_TEST_H
+#define CAPSICUM_TEST_H
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/resource.h>
+#include <signal.h>
+
+#include <ios>
+#include <ostream>
+
+#include "gtest/gtest.h"
+
+extern bool verbose;
+extern std::string tmpdir;
+extern bool tmpdir_on_tmpfs;
+extern bool force_mt;
+extern bool force_nofork;
+extern uid_t other_uid;
+
+static inline void *WaitingThreadFn(void *) {
+  // Loop until cancelled
+  while (true) {
+    usleep(10000);
+    pthread_testcancel();
+  }
+  return NULL;
+}
+
+// If force_mt is set, run another thread in parallel with the test.  This forces
+// the kernel into multi-threaded mode.
+template <typename T, typename Function>
+void MaybeRunWithThread(T *self, Function fn) {
+  pthread_t subthread;
+  if (force_mt) {
+    pthread_create(&subthread, NULL, WaitingThreadFn, NULL);
+  }
+  (self->*fn)();
+  if (force_mt) {
+    pthread_cancel(subthread);
+    pthread_join(subthread, NULL);
+  }
+}
+template <typename Function>
+void MaybeRunWithThread(Function fn) {
+  pthread_t subthread;
+  if (force_mt) {
+    pthread_create(&subthread, NULL, WaitingThreadFn, NULL);
+  }
+  (fn)();
+  if (force_mt) {
+    pthread_cancel(subthread);
+    pthread_join(subthread, NULL);
+  }
+}
+
+// Return the absolute path of a filename in the temp directory, `tmpdir`,
+// with the given pathname, e.g., "/tmp/<pathname>", if `tmpdir` was set to
+// "/tmp".
+const char *TmpFile(const char *pathname);
+
+// Run the given test function in a forked process, so that trapdoor
+// entry doesn't affect other tests, and watch out for hung processes.
+// Implemented as a macro to allow access to the test case instance's
+// HasFailure() method, which is reported as the forked process's
+// exit status.
+#define _RUN_FORKED(INNERCODE, TESTCASENAME, TESTNAME)         \
+    pid_t pid = force_nofork ? 0 : fork();                     \
+    if (pid == 0) {                                            \
+      INNERCODE;                                               \
+      if (!force_nofork) {                                     \
+        exit(HasFailure());                                    \
+      }                                                        \
+    } else if (pid > 0) {                                      \
+      int rc, status;                                          \
+      int remaining_us = 10000000;                             \
+      while (remaining_us > 0) {                               \
+        status = 0;                                            \
+        rc = waitpid(pid, &status, WNOHANG);                   \
+        if (rc != 0) break;                                    \
+        remaining_us -= 10000;                                 \
+        usleep(10000);                                         \
+      }                                                        \
+      if (remaining_us <= 0) {                                 \
+        fprintf(stderr, "Warning: killing unresponsive test "  \
+                        "%s.%s (pid %d)\n",                    \
+                        TESTCASENAME, TESTNAME, pid);          \
+        kill(pid, SIGKILL);                                    \
+        ADD_FAILURE() << "Test hung";                          \
+      } else if (rc < 0) {                                     \
+        fprintf(stderr, "Warning: waitpid error %s (%d)\n",    \
+                        strerror(errno), errno);               \
+        ADD_FAILURE() << "Failed to wait for child";           \
+      } else {                                                 \
+        int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; \
+        EXPECT_EQ(0, rc);                                      \
+      }                                                        \
+    }
+#define _RUN_FORKED_MEM(THIS, TESTFN, TESTCASENAME, TESTNAME)  \
+  _RUN_FORKED(MaybeRunWithThread(THIS, &TESTFN), TESTCASENAME, TESTNAME);
+#define _RUN_FORKED_FN(TESTFN, TESTCASENAME, TESTNAME)   \
+  _RUN_FORKED(MaybeRunWithThread(&TESTFN), TESTCASENAME, TESTNAME);
+
+// Run a test case in a forked process, possibly cleaning up a
+// test file after completion
+#define FORK_TEST_ON(test_case_name, test_name, test_file)     \
+    static void test_case_name##_##test_name##_ForkTest();     \
+    TEST(test_case_name, test_name ## Forked) {                \
+      _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest,  \
+                     #test_case_name, #test_name);             \
+      const char *filename = test_file;                        \
+      if (filename) unlink(filename);                          \
+    }                                                          \
+    static void test_case_name##_##test_name##_ForkTest()
+
+#define FORK_TEST(test_case_name, test_name) FORK_TEST_ON(test_case_name, test_name, NULL)
+
+// Run a test case fixture in a forked process, so that trapdoors don't
+// affect other tests.
+#define ICLASS_NAME(test_case_name, test_name) Forked##test_case_name##_##test_name
+#define FORK_TEST_F(test_case_name, test_name)                \
+  class ICLASS_NAME(test_case_name, test_name) : public test_case_name { \
+    public:                                                    \
+      ICLASS_NAME(test_case_name, test_name)() {}              \
+      void InnerTestBody();                                    \
+    };                                                         \
+    TEST_F(ICLASS_NAME(test_case_name, test_name), _) {        \
+      _RUN_FORKED_MEM(this,                                    \
+                      ICLASS_NAME(test_case_name, test_name)::InnerTestBody,  \
+                      #test_case_name, #test_name);            \
+    }                                                          \
+    void ICLASS_NAME(test_case_name, test_name)::InnerTestBody()
+
+// Emit errno information on failure
+#define EXPECT_OK(v) EXPECT_LE(0, v) << "   errno " << errno << " " << strerror(errno)
+
+// Expect a syscall to fail with the given error.
+#define EXPECT_SYSCALL_FAIL(E, C) \
+    do { \
+      EXPECT_GT(0, C); \
+      EXPECT_EQ(E, errno); \
+    } while (0)
+
+// Expect a syscall to fail with anything other than the given error.
+#define EXPECT_SYSCALL_FAIL_NOT(E, C) \
+    do { \
+      EXPECT_GT(0, C); \
+      EXPECT_NE(E, errno); \
+    } while (0)
+
+// Expect a void syscall to fail with anything other than the given error.
+#define EXPECT_VOID_SYSCALL_FAIL_NOT(E, C)   \
+    do { \
+      errno = 0; \
+      C; \
+      EXPECT_NE(E, errno) << #C << " failed with ECAPMODE"; \
+    } while (0)
+
+// Expect a system call to fail due to path traversal; exact error
+// code is OS-specific.
+#ifdef O_BENEATH
+#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \
+    do { \
+      const int result = openat((fd), (path), (flags)); \
+      if (((flags) & O_BENEATH) == O_BENEATH) { \
+        EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_O_BENEATH, result); \
+      } else { \
+        EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \
+      } \
+    } while (0)
+#else
+#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \
+    do { \
+      const int result = openat((fd), (path), (flags)); \
+      EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \
+    } while (0)
+#endif
+
+// Expect a system call to fail with ECAPMODE.
+#define EXPECT_CAPMODE(C) EXPECT_SYSCALL_FAIL(ECAPMODE, C)
+
+// Expect a system call to fail, but not with ECAPMODE.
+#define EXPECT_FAIL_NOT_CAPMODE(C) EXPECT_SYSCALL_FAIL_NOT(ECAPMODE, C)
+#define EXPECT_FAIL_VOID_NOT_CAPMODE(C) EXPECT_VOID_SYSCALL_FAIL_NOT(ECAPMODE, C)
+
+// Expect a system call to fail with ENOTCAPABLE.
+#define EXPECT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL(ENOTCAPABLE, C)
+
+// Expect a system call to fail, but not with ENOTCAPABLE.
+#define EXPECT_FAIL_NOT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL_NOT(ENOTCAPABLE, C)
+
+// Expect a system call to fail with either ENOTCAPABLE or ECAPMODE.
+#define EXPECT_CAPFAIL(C) \
+    do { \
+      int rc = C; \
+      EXPECT_GT(0, rc); \
+      EXPECT_TRUE(errno == ECAPMODE || errno == ENOTCAPABLE) \
+        << #C << " did not fail with ECAPMODE/ENOTCAPABLE but " << errno; \
+    } while (0)
+
+// Ensure that 'rights' are a subset of 'max'.
+#define EXPECT_RIGHTS_IN(rights, max) \
+    EXPECT_TRUE(cap_rights_contains((max), (rights)))  \
+    << "rights " << std::hex << *(rights) \
+    << " not a subset of " << std::hex << *(max)
+
+// Ensure rights are identical
+#define EXPECT_RIGHTS_EQ(a, b) \
+  do { \
+    EXPECT_RIGHTS_IN((a), (b)); \
+    EXPECT_RIGHTS_IN((b), (a)); \
+  } while (0)
+
+// Get the state of a process as a single character.
+//  - 'D': disk wait
+//  - 'R': runnable
+//  - 'S': sleeping/idle
+//  - 'T': stopped
+//  - 'Z': zombie
+// On error, return either '?' or '\0'.
+char ProcessState(int pid);
+
+// Check process state reaches a particular expected state (or two).
+// Retries a few times to allow for timing issues.
+#define EXPECT_PID_REACHES_STATES(pid, expected1, expected2) { \
+  int counter = 5; \
+  char state; \
+  do { \
+    state = ProcessState(pid); \
+    if (state == expected1 || state == expected2) break; \
+    usleep(100000); \
+  } while (--counter > 0); \
+  EXPECT_TRUE(state == expected1 || state == expected2) \
+      << " pid " << pid << " in state " << state; \
+}
+
+#define EXPECT_PID_ALIVE(pid)   EXPECT_PID_REACHES_STATES(pid, 'R', 'S')
+#define EXPECT_PID_DEAD(pid)    EXPECT_PID_REACHES_STATES(pid, 'Z', '\0')
+#define EXPECT_PID_ZOMBIE(pid)  EXPECT_PID_REACHES_STATES(pid, 'Z', 'Z');
+#define EXPECT_PID_GONE(pid)    EXPECT_PID_REACHES_STATES(pid, '\0', '\0');
+
+void ShowSkippedTests(std::ostream& os);
+void TestSkipped(const char *testcase, const char *test, const std::string& reason);
+#define TEST_SKIPPED(reason) \
+  do { \
+    const ::testing::TestInfo* const info = ::testing::UnitTest::GetInstance()->current_test_info(); \
+    std::cerr << "Skipping " << info->test_case_name() << "::" << info->name() << " because: " << reason << std::endl; \
+    TestSkipped(info->test_case_name(), info->name(), reason);          \
+  } while (0)
+
+// Mark a test that can only be run as root.
+#define REQUIRE_ROOT() \
+  if (getuid() != 0) { \
+    TEST_SKIPPED("requires root"); \
+    return; \
+  }
+
+#endif  // CAPSICUM_TEST_H
--- a/contrib/capsicum-test/capsicum.h
+++ b/contrib/capsicum-test/capsicum.h
@ -0,0 +1,175 @@
+/*
+ * Minimal portability layer for Capsicum-related features.
+ */
+#ifndef __CAPSICUM_H__
+#define __CAPSICUM_H__
+
+#ifdef __FreeBSD__
+#include "capsicum-freebsd.h"
+#endif
+
+#ifdef __linux__
+#include "capsicum-linux.h"
+#endif
+
+/*
+ * CAP_ALL/CAP_NONE is a value in FreeBSD9.x Capsicum, but a functional macro
+ * in FreeBSD10.x Capsicum.  Always use CAP_SET_ALL/CAP_SET_NONE instead.
+ */
+#ifndef CAP_SET_ALL
+#ifdef CAP_RIGHTS_VERSION
+#define CAP_SET_ALL(rights) CAP_ALL(rights)
+#else
+#define CAP_SET_ALL(rights) *(rights) = CAP_MASK_VALID
+#endif
+#endif
+
+#ifndef CAP_SET_NONE
+#ifdef CAP_RIGHTS_VERSION
+#define CAP_SET_NONE(rights) CAP_NONE(rights)
+#else
+#define CAP_SET_NONE(rights) *(rights) = 0
+#endif
+#endif
+
+
+/************************************************************
+ * Define new-style rights in terms of old-style rights if
+ * absent.
+ ************************************************************/
+#include "capsicum-rights.h"
+
+/*
+ * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out.
+ *  (src, dest): RENAMEAT, LINKAT => RENAMEAT_SOURCE, RENAMEAT_TARGET
+ */
+#ifndef CAP_RENAMEAT_SOURCE
+#define CAP_RENAMEAT_SOURCE CAP_RENAMEAT
+#endif
+#ifndef CAP_RENAMEAT_TARGET
+#define CAP_RENAMEAT_TARGET CAP_LINKAT
+#endif
+/*
+ * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out.
+ *  (src, dest): 0, LINKAT => LINKAT_SOURCE, LINKAT_TARGET
+ */
+#ifndef CAP_LINKAT_SOURCE
+#define CAP_LINKAT_SOURCE CAP_LOOKUP
+#endif
+#ifndef CAP_LINKAT_TARGET
+#define CAP_LINKAT_TARGET CAP_LINKAT
+#endif
+
+#ifdef CAP_PREAD
+/* Existence of CAP_PREAD implies new-style CAP_SEEK semantics */
+#define CAP_SEEK_ASWAS 0
+#else
+/* Old-style CAP_SEEK semantics */
+#define CAP_SEEK_ASWAS CAP_SEEK
+#define CAP_PREAD CAP_READ
+#define CAP_PWRITE CAP_WRITE
+#endif
+
+#ifndef CAP_MMAP_R
+#define CAP_MMAP_R (CAP_READ|CAP_MMAP)
+#define CAP_MMAP_W (CAP_WRITE|CAP_MMAP)
+#define CAP_MMAP_X (CAP_MAPEXEC|CAP_MMAP)
+#define CAP_MMAP_RW (CAP_MMAP_R|CAP_MMAP_W)
+#define CAP_MMAP_RX (CAP_MMAP_R|CAP_MMAP_X)
+#define CAP_MMAP_WX (CAP_MMAP_W|CAP_MMAP_X)
+#define CAP_MMAP_RWX (CAP_MMAP_R|CAP_MMAP_W|CAP_MMAP_X)
+#endif
+
+#ifndef CAP_MKFIFOAT
+#define CAP_MKFIFOAT CAP_MKFIFO
+#endif
+
+#ifndef CAP_MKNODAT
+#define CAP_MKNODAT CAP_MKFIFOAT
+#endif
+
+#ifndef CAP_MKDIRAT
+#define CAP_MKDIRAT CAP_MKDIR
+#endif
+
+#ifndef CAP_UNLINKAT
+#define CAP_UNLINKAT CAP_RMDIR
+#endif
+
+#ifndef CAP_SOCK_CLIENT
+#define CAP_SOCK_CLIENT \
+        (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
+         CAP_PEELOFF | CAP_READ | CAP_WRITE | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#endif
+
+#ifndef CAP_SOCK_SERVER
+#define CAP_SOCK_SERVER \
+        (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
+         CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_READ | CAP_WRITE | \
+         CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#endif
+
+#ifndef CAP_EVENT
+#define CAP_EVENT CAP_POLL_EVENT
+#endif
+
+/************************************************************
+ * Define new-style API functions in terms of old-style API
+ * functions if absent.
+ ************************************************************/
+#ifndef HAVE_CAP_RIGHTS_GET
+/* Define cap_rights_get() in terms of old-style cap_getrights() */
+inline int cap_rights_get(int fd, cap_rights_t *rights) {
+  return cap_getrights(fd, rights);
+}
+#endif
+
+#ifndef HAVE_CAP_RIGHTS_LIMIT
+/* Define cap_rights_limit() in terms of old-style cap_new() and dup2() */
+#include <unistd.h>
+inline int cap_rights_limit(int fd, const cap_rights_t *rights) {
+  int cap = cap_new(fd, *rights);
+  if (cap < 0) return cap;
+  int rc = dup2(cap, fd);
+  if (rc < 0) return rc;
+  close(cap);
+  return rc;
+}
+#endif
+
+#include <stdio.h>
+#ifdef CAP_RIGHTS_VERSION
+/* New-style Capsicum API extras for debugging */
+static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) {
+  int ii;
+  for (ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) {
+    int len = sprintf(buffer, "0x%016llx ", (unsigned long long)rights->cr_rights[ii]);
+    buffer += len;
+  }
+}
+
+#ifdef __cplusplus
+#include <iostream>
+#include <iomanip>
+inline std::ostream& operator<<(std::ostream& os, cap_rights_t rights) {
+  for (int ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) {
+    os << std::hex << std::setw(16) << std::setfill('0') << (unsigned long long)rights.cr_rights[ii] << " ";
+  }
+  return os;
+}
+#endif
+
+#else
+
+static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) {
+  sprintf(buffer, "0x%016llx", (*rights));
+}
+
+#endif  /* new/old style rights manipulation */
+
+#ifdef __cplusplus
+#include <string>
+extern std::string capsicum_test_bindir;
+#endif
+
+#endif /*__CAPSICUM_H__*/
--- a/contrib/capsicum-test/fcntl.cc
+++ b/contrib/capsicum-test/fcntl.cc
@ -0,0 +1,411 @@
+// Test that fcntl works in capability mode.
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+#include "syscalls.h"
+
+// Ensure that fcntl() works consistently for both regular file descriptors and
+// capability-wrapped ones.
+FORK_TEST(Fcntl, Basic) {
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_READ, CAP_FCNTL);
+
+  typedef std::map<std::string, int> FileMap;
+
+  // Open some files of different types, and wrap them in capabilities.
+  FileMap files;
+  files["file"] = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(files["file"]);
+  files["socket"] = socket(PF_LOCAL, SOCK_STREAM, 0);
+  EXPECT_OK(files["socket"]);
+  char shm_name[128];
+  sprintf(shm_name, "/capsicum-test-%d", getuid());
+  files["SHM"] = shm_open(shm_name, (O_CREAT|O_RDWR), 0600);
+  if ((files["SHM"] == -1) && errno == ENOSYS) {
+    // shm_open() is not implemented in user-mode Linux.
+    files.erase("SHM");
+  } else {
+    EXPECT_OK(files["SHM"]);
+  }
+
+  FileMap caps;
+  for (FileMap::iterator ii = files.begin(); ii != files.end(); ++ii) {
+    std::string key = ii->first + " cap";
+    caps[key] = dup(ii->second);
+    EXPECT_OK(cap_rights_limit(caps[key], &rights));
+    EXPECT_OK(caps[key]) << " on " << ii->first;
+  }
+
+  FileMap all(files);
+  all.insert(files.begin(), files.end());
+
+  EXPECT_OK(cap_enter());  // Enter capability mode.
+
+  // Ensure that we can fcntl() all the files that we opened above.
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ);
+  for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) {
+    EXPECT_OK(fcntl(ii->second, F_GETFL, 0)) << " on " << ii->first;
+    int cap = dup(ii->second);
+    EXPECT_OK(cap) << " on " << ii->first;
+    EXPECT_OK(cap_rights_limit(cap, &r_ro)) << " on " << ii->first;
+    EXPECT_EQ(-1, fcntl(cap, F_GETFL, 0)) << " on " << ii->first;
+    EXPECT_EQ(ENOTCAPABLE, errno) << " on " << ii->first;
+    close(cap);
+  }
+  for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) {
+    close(ii->second);
+  }
+  shm_unlink(shm_name);
+}
+
+// Supported fcntl(2) operations:
+//   FreeBSD10         FreeBSD9.1:  Linux:           Rights:            Summary:
+//   F_DUPFD           F_DUPFD      F_DUPFD          NONE               as dup(2)
+//   F_DUPFD_CLOEXEC                F_DUPFD_CLOEXEC  NONE               as dup(2) with close-on-exec
+//   F_DUP2FD          F_DUP2FD                      NONE               as dup2(2)
+//   F_DUP2FD_CLOEXEC                                NONE               as dup2(2) with close-on-exec
+//   F_GETFD           F_GETFD      F_GETFD          NONE               get close-on-exec flag
+//   F_SETFD           F_SETFD      F_SETFD          NONE               set close-on-exec flag
+// * F_GETFL           F_GETFL      F_GETFL          FCNTL              get file status flag
+// * F_SETFL           F_SETFL      F_SETFL          FCNTL              set file status flag
+// * F_GETOWN          F_GETOWN     F_GETOWN         FCNTL              get pid receiving SIGIO/SIGURG
+// * F_SETOWN          F_SETOWN     F_SETOWN         FCNTL              set pid receiving SIGIO/SIGURG
+// *                                F_GETOWN_EX      FCNTL              get pid/thread receiving SIGIO/SIGURG
+// *                                F_SETOWN_EX      FCNTL              set pid/thread receiving SIGIO/SIGURG
+//   F_GETLK           F_GETLK      F_GETLK          FLOCK              get lock info
+//   F_SETLK           F_SETLK      F_SETLK          FLOCK              set lock info
+//   F_SETLK_REMOTE                                  FLOCK              set lock info
+//   F_SETLKW          F_SETLKW     F_SETLKW         FLOCK              set lock info (blocking)
+//   F_READAHEAD       F_READAHEAD                   NONE               set or clear readahead amount
+//   F_RDAHEAD         F_RDAHEAD                     NONE               set or clear readahead amount to 128KB
+//                                  F_GETSIG         POLL_EVENT+FSIGNAL get signal sent when I/O possible
+//                                  F_SETSIG         POLL_EVENT+FSIGNAL set signal sent when I/O possible
+//                                  F_GETLEASE       FLOCK+FSIGNAL      get lease on file descriptor
+//                                  F_SETLEASE       FLOCK+FSIGNAL      set new lease on file descriptor
+//                                  F_NOTIFY         NOTIFY             generate signal on changes (dnotify)
+//                                  F_GETPIPE_SZ     GETSOCKOPT         get pipe size
+//                                  F_SETPIPE_SZ     SETSOCKOPT         set pipe size
+//                                  F_GET_SEAL       FSTAT              get memfd seals
+//                                  F_ADD_SEAL       FCHMOD             set memfd seal
+// If HAVE_CAP_FCNTLS_LIMIT is defined, then fcntl(2) operations that require
+// CAP_FCNTL (marked with * above) can be further limited with cap_fcntls_limit(2).
+namespace {
+#define FCNTL_NUM_RIGHTS 9
+cap_rights_t fcntl_rights[FCNTL_NUM_RIGHTS];
+void InitRights() {
+  cap_rights_init(&(fcntl_rights[0]), 0);  // Later code assumes this is at [0]
+  cap_rights_init(&(fcntl_rights[1]), CAP_READ, CAP_WRITE);
+  cap_rights_init(&(fcntl_rights[2]), CAP_FCNTL);
+  cap_rights_init(&(fcntl_rights[3]), CAP_FLOCK);
+#ifdef CAP_FSIGNAL
+  cap_rights_init(&(fcntl_rights[4]), CAP_EVENT, CAP_FSIGNAL);
+  cap_rights_init(&(fcntl_rights[5]), CAP_FLOCK, CAP_FSIGNAL);
+#else
+  cap_rights_init(&(fcntl_rights[4]), 0);
+  cap_rights_init(&(fcntl_rights[5]), 0);
+#endif
+#ifdef CAP_NOTIFY
+  cap_rights_init(&(fcntl_rights[6]), CAP_NOTIFY);
+#else
+  cap_rights_init(&(fcntl_rights[6]), 0);
+#endif
+  cap_rights_init(&(fcntl_rights[7]), CAP_SETSOCKOPT);
+  cap_rights_init(&(fcntl_rights[8]), CAP_GETSOCKOPT);
+}
+
+int CheckFcntl(unsigned long long right, int caps[FCNTL_NUM_RIGHTS], int cmd, long arg, const char* context) {
+  SCOPED_TRACE(context);
+  cap_rights_t rights;
+  cap_rights_init(&rights, right);
+  int ok_index = -1;
+  for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+    if (cap_rights_contains(&(fcntl_rights[ii]), &rights)) {
+      if (ok_index == -1) ok_index = ii;
+      continue;
+    }
+    EXPECT_NOTCAPABLE(fcntl(caps[ii], cmd, arg));
+  }
+  EXPECT_NE(-1, ok_index);
+  int rc = fcntl(caps[ok_index], cmd, arg);
+  EXPECT_OK(rc);
+  return rc;
+}
+}  // namespace
+
+#define CHECK_FCNTL(right, caps, cmd, arg) \
+    CheckFcntl(right, caps, cmd, arg, "fcntl(" #cmd ") expect " #right)
+
+TEST(Fcntl, Commands) {
+  InitRights();
+  int fd = open(TmpFile("cap_fcntl_cmds"), O_RDWR|O_CREAT, 0644);
+  EXPECT_OK(fd);
+  write(fd, "TEST", 4);
+  int sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+  EXPECT_OK(sock);
+  int caps[FCNTL_NUM_RIGHTS];
+  int sock_caps[FCNTL_NUM_RIGHTS];
+  for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+    caps[ii] = dup(fd);
+    EXPECT_OK(caps[ii]);
+    EXPECT_OK(cap_rights_limit(caps[ii], &(fcntl_rights[ii])));
+    sock_caps[ii] = dup(sock);
+    EXPECT_OK(sock_caps[ii]);
+    EXPECT_OK(cap_rights_limit(sock_caps[ii], &(fcntl_rights[ii])));
+  }
+
+  // Check the things that need no rights against caps[0].
+  int newfd = fcntl(caps[0], F_DUPFD, 0);
+  EXPECT_OK(newfd);
+  // dup()'ed FD should have same rights.
+  cap_rights_t rights;
+  cap_rights_init(&rights, 0);
+  EXPECT_OK(cap_rights_get(newfd, &rights));
+  EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights);
+  close(newfd);
+#ifdef HAVE_F_DUP2FD
+  EXPECT_OK(fcntl(caps[0], F_DUP2FD, newfd));
+  // dup2()'ed FD should have same rights.
+  EXPECT_OK(cap_rights_get(newfd, &rights));
+  EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights);
+  close(newfd);
+#endif
+
+  EXPECT_OK(fcntl(caps[0], F_GETFD, 0));
+  EXPECT_OK(fcntl(caps[0], F_SETFD, 0));
+
+  // Check operations that need CAP_FCNTL.
+  int fd_flag = CHECK_FCNTL(CAP_FCNTL, caps, F_GETFL, 0);
+  EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, caps, F_SETFL, fd_flag));
+  int owner = CHECK_FCNTL(CAP_FCNTL, sock_caps, F_GETOWN, 0);
+  EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, sock_caps, F_SETOWN, owner));
+
+  // Check an operation needing CAP_FLOCK.
+  struct flock fl;
+  memset(&fl, 0, sizeof(fl));
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 1;
+  EXPECT_EQ(0, CHECK_FCNTL(CAP_FLOCK, caps, F_GETLK, (long)&fl));
+
+  for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+    close(sock_caps[ii]);
+    close(caps[ii]);
+  }
+  close(sock);
+  close(fd);
+  unlink(TmpFile("cap_fcntl_cmds"));
+}
+
+TEST(Fcntl, WriteLock) {
+  int fd = open(TmpFile("cap_fcntl_readlock"), O_RDWR|O_CREAT, 0644);
+  EXPECT_OK(fd);
+  write(fd, "TEST", 4);
+
+  int cap = dup(fd);
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_FCNTL, CAP_READ, CAP_WRITE, CAP_FLOCK);
+  EXPECT_OK(cap_rights_limit(cap, &rights));
+
+  struct flock fl;
+  memset(&fl, 0, sizeof(fl));
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 1;
+  // Write-Lock
+  EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl));
+
+  // Check write-locked (from another process).
+  pid_t child = fork();
+  if (child == 0) {
+    fl.l_type = F_WRLCK;
+    fl.l_whence = SEEK_SET;
+    fl.l_start = 0;
+    fl.l_len = 1;
+    EXPECT_OK(fcntl(fd, F_GETLK, (long)&fl));
+    EXPECT_NE(F_UNLCK, fl.l_type);
+    exit(HasFailure());
+  }
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  // Unlock
+  fl.l_type = F_UNLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 1;
+  EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl));
+
+  close(cap);
+  close(fd);
+  unlink(TmpFile("cap_fcntl_readlock"));
+}
+
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+TEST(Fcntl, SubRightNormalFD) {
+  int fd = open(TmpFile("cap_fcntl_subrightnorm"), O_RDWR|O_CREAT, 0644);
+  EXPECT_OK(fd);
+
+  // Restrict the fcntl(2) subrights of a normal FD.
+  EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+  int fd_flag = fcntl(fd, F_GETFL, 0);
+  EXPECT_OK(fd_flag);
+  EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag));
+
+  // Expect to have all capabilities.
+  cap_rights_t rights;
+  EXPECT_OK(cap_rights_get(fd, &rights));
+  cap_rights_t all;
+  CAP_SET_ALL(&all);
+  EXPECT_RIGHTS_EQ(&all, &rights);
+  cap_fcntl_t fcntls;
+  EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+  // Can't widen the subrights.
+  EXPECT_NOTCAPABLE(cap_fcntls_limit(fd, CAP_FCNTL_GETFL|CAP_FCNTL_SETFL));
+
+  close(fd);
+  unlink(TmpFile("cap_fcntl_subrightnorm"));
+}
+
+TEST(Fcntl, PreserveSubRights) {
+  int fd = open(TmpFile("cap_fcntl_subrightpreserve"), O_RDWR|O_CREAT, 0644);
+  EXPECT_OK(fd);
+
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FCNTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+
+  cap_rights_t cur_rights;
+  cap_fcntl_t fcntls;
+  EXPECT_OK(cap_rights_get(fd, &cur_rights));
+  EXPECT_RIGHTS_EQ(&rights, &cur_rights);
+  EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+  // Limiting the top-level rights leaves the subrights unaffected...
+  cap_rights_clear(&rights, CAP_READ);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+  // ... until we remove CAP_FCNTL.
+  cap_rights_clear(&rights, CAP_FCNTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)0, fcntls);
+  EXPECT_EQ(-1, cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+
+  close(fd);
+  unlink(TmpFile("cap_fcntl_subrightpreserve"));
+}
+
+TEST(Fcntl, FLSubRights) {
+  int fd = open(TmpFile("cap_fcntl_subrights"), O_RDWR|O_CREAT, 0644);
+  EXPECT_OK(fd);
+  write(fd, "TEST", 4);
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_FCNTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+
+  // Check operations that need CAP_FCNTL with subrights pristine => OK.
+  int fd_flag = fcntl(fd, F_GETFL, 0);
+  EXPECT_OK(fd_flag);
+  EXPECT_OK(fcntl(fd, F_SETFL, fd_flag));
+
+  // Check operations that need CAP_FCNTL with all subrights => OK.
+  EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_ALL));
+  fd_flag = fcntl(fd, F_GETFL, 0);
+  EXPECT_OK(fd_flag);
+  EXPECT_OK(fcntl(fd, F_SETFL, fd_flag));
+
+  // Check operations that need CAP_FCNTL with specific subrights.
+  int fd_get = dup(fd);
+  int fd_set = dup(fd);
+  EXPECT_OK(cap_fcntls_limit(fd_get, CAP_FCNTL_GETFL));
+  EXPECT_OK(cap_fcntls_limit(fd_set, CAP_FCNTL_SETFL));
+
+  fd_flag = fcntl(fd_get, F_GETFL, 0);
+  EXPECT_OK(fd_flag);
+  EXPECT_NOTCAPABLE(fcntl(fd_set, F_GETFL, 0));
+  EXPECT_OK(fcntl(fd_set, F_SETFL, fd_flag));
+  EXPECT_NOTCAPABLE(fcntl(fd_get, F_SETFL, fd_flag));
+  close(fd_get);
+  close(fd_set);
+
+  // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE.
+  EXPECT_OK(cap_fcntls_limit(fd, 0));
+  EXPECT_NOTCAPABLE(fcntl(fd, F_GETFL, 0));
+  EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag));
+
+  close(fd);
+  unlink(TmpFile("cap_fcntl_subrights"));
+}
+
+TEST(Fcntl, OWNSubRights) {
+  int sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+  EXPECT_OK(sock);
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_FCNTL);
+  EXPECT_OK(cap_rights_limit(sock, &rights));
+
+  // Check operations that need CAP_FCNTL with no subrights => OK.
+  int owner = fcntl(sock, F_GETOWN, 0);
+  EXPECT_OK(owner);
+  EXPECT_OK(fcntl(sock, F_SETOWN, owner));
+
+  // Check operations that need CAP_FCNTL with all subrights => OK.
+  EXPECT_OK(cap_fcntls_limit(sock, CAP_FCNTL_ALL));
+  owner = fcntl(sock, F_GETOWN, 0);
+  EXPECT_OK(owner);
+  EXPECT_OK(fcntl(sock, F_SETOWN, owner));
+
+  // Check operations that need CAP_FCNTL with specific subrights.
+  int sock_get = dup(sock);
+  int sock_set = dup(sock);
+  EXPECT_OK(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN));
+  EXPECT_OK(cap_fcntls_limit(sock_set, CAP_FCNTL_SETOWN));
+  owner = fcntl(sock_get, F_GETOWN, 0);
+  EXPECT_OK(owner);
+  EXPECT_NOTCAPABLE(fcntl(sock_set, F_GETOWN, 0));
+  EXPECT_OK(fcntl(sock_set, F_SETOWN, owner));
+  EXPECT_NOTCAPABLE(fcntl(sock_get, F_SETOWN, owner));
+  // Also check we can retrieve the subrights.
+  cap_fcntl_t fcntls;
+  EXPECT_OK(cap_fcntls_get(sock_get, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETOWN, fcntls);
+  EXPECT_OK(cap_fcntls_get(sock_set, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_SETOWN, fcntls);
+  // And that we can't widen the subrights.
+  EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN));
+  EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_set, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN));
+  close(sock_get);
+  close(sock_set);
+
+  // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE.
+  EXPECT_OK(cap_fcntls_limit(sock, 0));
+  EXPECT_NOTCAPABLE(fcntl(sock, F_GETOWN, 0));
+  EXPECT_NOTCAPABLE(fcntl(sock, F_SETOWN, owner));
+
+  close(sock);
+}
+#endif
--- a/contrib/capsicum-test/fexecve.cc
+++ b/contrib/capsicum-test/fexecve.cc
@ -0,0 +1,208 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sstream>
+
+#include "syscalls.h"
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+// Arguments to use in execve() calls.
+static char* null_envp[] = {NULL};
+
+class Execve : public ::testing::Test {
+ public:
+  Execve() : exec_fd_(-1) {
+    // We need a program to exec(), but for fexecve() to work in capability
+    // mode that program needs to be statically linked (otherwise ld.so will
+    // attempt to traverse the filesystem to load (e.g.) /lib/libc.so and
+    // fail).
+    exec_prog_ = capsicum_test_bindir + "/mini-me";
+    exec_prog_noexec_ = capsicum_test_bindir + "/mini-me.noexec";
+    exec_prog_setuid_ = capsicum_test_bindir + "/mini-me.setuid";
+
+    exec_fd_ = open(exec_prog_.c_str(), O_RDONLY);
+    if (exec_fd_ < 0) {
+      fprintf(stderr, "Error! Failed to open %s\n", exec_prog_.c_str());
+    }
+    argv_checkroot_[0] = (char*)exec_prog_.c_str();
+    argv_fail_[0] = (char*)exec_prog_.c_str();
+    argv_pass_[0] = (char*)exec_prog_.c_str();
+  }
+  ~Execve() {
+    if (exec_fd_ >= 0) {
+      close(exec_fd_);
+      exec_fd_ = -1;
+    }
+  }
+protected:
+  char* argv_checkroot_[3] = {nullptr, (char*)"--checkroot", nullptr};
+  char* argv_fail_[3] = {nullptr, (char*)"--fail", nullptr};
+  char* argv_pass_[3] = {nullptr, (char*)"--pass", nullptr};
+  std::string exec_prog_, exec_prog_noexec_, exec_prog_setuid_;
+  int exec_fd_;
+};
+
+class Fexecve : public Execve {
+ public:
+  Fexecve() : Execve() {}
+};
+
+class FexecveWithScript : public Fexecve {
+ public:
+  FexecveWithScript() :
+    Fexecve(), temp_script_filename_(TmpFile("cap_sh_script")) {}
+
+  void SetUp() override {
+    // First, build an executable shell script
+    int fd = open(temp_script_filename_, O_RDWR|O_CREAT, 0755);
+    EXPECT_OK(fd);
+    const char* contents = "#!/bin/sh\nexit 99\n";
+    EXPECT_OK(write(fd, contents, strlen(contents)));
+    close(fd);
+  }
+  void TearDown() override {
+    (void)::unlink(temp_script_filename_);
+  }
+
+  const char *temp_script_filename_;
+};
+
+FORK_TEST_F(Execve, BasicFexecve) {
+  EXPECT_OK(fexecve_(exec_fd_, argv_pass_, null_envp));
+  // Should not reach here, exec() takes over.
+  EXPECT_TRUE(!"fexecve() should never return");
+}
+
+FORK_TEST_F(Execve, InCapMode) {
+  EXPECT_OK(cap_enter());
+  EXPECT_OK(fexecve_(exec_fd_, argv_pass_, null_envp));
+  // Should not reach here, exec() takes over.
+  EXPECT_TRUE(!"fexecve() should never return");
+}
+
+FORK_TEST_F(Execve, FailWithoutCap) {
+  EXPECT_OK(cap_enter());
+  int cap_fd = dup(exec_fd_);
+  EXPECT_OK(cap_fd);
+  cap_rights_t rights;
+  cap_rights_init(&rights, 0);
+  EXPECT_OK(cap_rights_limit(cap_fd, &rights));
+  EXPECT_EQ(-1, fexecve_(cap_fd, argv_fail_, null_envp));
+  EXPECT_EQ(ENOTCAPABLE, errno);
+}
+
+FORK_TEST_F(Execve, SucceedWithCap) {
+  EXPECT_OK(cap_enter());
+  int cap_fd = dup(exec_fd_);
+  EXPECT_OK(cap_fd);
+  cap_rights_t rights;
+  // TODO(drysdale): would prefer that Linux Capsicum not need all of these
+  // rights -- just CAP_FEXECVE|CAP_READ or CAP_FEXECVE would be preferable.
+  cap_rights_init(&rights, CAP_FEXECVE, CAP_LOOKUP, CAP_READ);
+  EXPECT_OK(cap_rights_limit(cap_fd, &rights));
+  EXPECT_OK(fexecve_(cap_fd, argv_pass_, null_envp));
+  // Should not reach here, exec() takes over.
+  EXPECT_TRUE(!"fexecve() should have succeeded");
+}
+
+FORK_TEST_F(Fexecve, ExecutePermissionCheck) {
+  int fd = open(exec_prog_noexec_.c_str(), O_RDONLY);
+  EXPECT_OK(fd);
+  if (fd >= 0) {
+    struct stat data;
+    EXPECT_OK(fstat(fd, &data));
+    EXPECT_EQ((mode_t)0, data.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH));
+    EXPECT_EQ(-1, fexecve_(fd, argv_fail_, null_envp));
+    EXPECT_EQ(EACCES, errno);
+    close(fd);
+  }
+}
+
+FORK_TEST_F(Fexecve, SetuidIgnored) {
+  if (geteuid() == 0) {
+    TEST_SKIPPED("requires non-root");
+    return;
+  }
+  int fd = open(exec_prog_setuid_.c_str(), O_RDONLY);
+  EXPECT_OK(fd);
+  EXPECT_OK(cap_enter());
+  if (fd >= 0) {
+    struct stat data;
+    EXPECT_OK(fstat(fd, &data));
+    EXPECT_EQ((mode_t)S_ISUID, data.st_mode & S_ISUID);
+    EXPECT_OK(fexecve_(fd, argv_checkroot_, null_envp));
+    // Should not reach here, exec() takes over.
+    EXPECT_TRUE(!"fexecve() should have succeeded");
+    close(fd);
+  }
+}
+
+FORK_TEST_F(Fexecve, ExecveFailure) {
+  EXPECT_OK(cap_enter());
+  EXPECT_EQ(-1, execve(argv_fail_[0], argv_fail_, null_envp));
+  EXPECT_EQ(ECAPMODE, errno);
+}
+
+FORK_TEST_F(FexecveWithScript, CapModeScriptFail) {
+  int fd;
+
+  // Open the script file, with CAP_FEXECVE rights.
+  fd = open(temp_script_filename_, O_RDONLY);
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_FEXECVE, CAP_READ, CAP_SEEK);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+
+  EXPECT_OK(cap_enter());  // Enter capability mode
+
+  // Attempt fexecve; should fail, because "/bin/sh" is inaccessible.
+  EXPECT_EQ(-1, fexecve_(fd, argv_pass_, null_envp));
+}
+
+#ifdef HAVE_EXECVEAT
+class Execveat : public Execve {
+ public:
+  Execveat() : Execve() {}
+};
+
+TEST_F(Execveat, NoUpwardTraversal) {
+  char *abspath = realpath(exec_prog_, NULL);
+  char cwd[1024];
+  getcwd(cwd, sizeof(cwd));
+
+  int dfd = open(".", O_DIRECTORY|O_RDONLY);
+  pid_t child = fork();
+  if (child == 0) {
+    EXPECT_OK(cap_enter());  // Enter capability mode.
+    // Can't execveat() an absolute path, even relative to a dfd.
+    EXPECT_SYSCALL_FAIL(ECAPMODE,
+                        execveat(AT_FDCWD, abspath, argv_pass_, null_envp, 0));
+    EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY,
+                        execveat(dfd, abspath, argv_pass_, null_envp, 0));
+
+    // Can't execveat() a relative path ("../<dir>/./<exe>").
+    char *p = cwd + strlen(cwd);
+    while (*p != '/') p--;
+    char buffer[1024] = "../";
+    strcat(buffer, ++p);
+    strcat(buffer, "/");
+    strcat(buffer, exec_prog_);
+    EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY,
+                        execveat(dfd, buffer, argv_pass_, null_envp, 0));
+    exit(HasFailure() ? 99 : 123);
+  }
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << status;
+  EXPECT_EQ(123, WEXITSTATUS(status));
+  free(abspath);
+  close(dfd);
+}
+#endif
--- a/contrib/capsicum-test/ioctl.cc
+++ b/contrib/capsicum-test/ioctl.cc
@ -0,0 +1,234 @@
+// Test that ioctl works in capability mode.
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+// Ensure that ioctl() works consistently for both regular file descriptors and
+// capability-wrapped ones.
+TEST(Ioctl, Basic) {
+  cap_rights_t rights_ioctl;
+  cap_rights_init(&rights_ioctl, CAP_IOCTL);
+  cap_rights_t rights_many;
+  cap_rights_init(&rights_many, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, CAP_FSYNC);
+
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  int fd_no = dup(fd);
+  EXPECT_OK(fd_no);
+  EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+  EXPECT_OK(cap_rights_limit(fd_no, &rights_many));
+
+  // Check that CAP_IOCTL is required.
+  int bytes;
+  EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+  EXPECT_NOTCAPABLE(ioctl(fd_no, FIONREAD, &bytes));
+
+  int one = 1;
+  EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+  EXPECT_NOTCAPABLE(ioctl(fd_no, FIOCLEX, &one));
+
+  close(fd);
+  close(fd_no);
+}
+
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+TEST(Ioctl, SubRightNormalFD) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+
+  // Restrict the ioctl(2) subrights of a normal FD.
+  cap_ioctl_t ioctl_nread = FIONREAD;
+  EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1));
+  int bytes;
+  EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+  int one = 1;
+  EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one));
+
+  // Expect to have all primary rights.
+  cap_rights_t rights;
+  EXPECT_OK(cap_rights_get(fd, &rights));
+  cap_rights_t all;
+  CAP_SET_ALL(&all);
+  EXPECT_RIGHTS_EQ(&all, &rights);
+  cap_ioctl_t ioctls[16];
+  memset(ioctls, 0, sizeof(ioctls));
+  ssize_t nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+  // Can't widen the subrights.
+  cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX};
+  EXPECT_NOTCAPABLE(cap_ioctls_limit(fd, both_ioctls, 2));
+
+  close(fd);
+}
+
+TEST(Ioctl, PreserveSubRights) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  cap_rights_t rights;
+  cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_IOCTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  cap_ioctl_t ioctl_nread = FIONREAD;
+  EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1));
+
+  cap_rights_t cur_rights;
+  cap_ioctl_t ioctls[16];
+  ssize_t nioctls;
+  EXPECT_OK(cap_rights_get(fd, &cur_rights));
+  EXPECT_RIGHTS_EQ(&rights, &cur_rights);
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+  // Limiting the top-level rights leaves the subrights unaffected...
+  cap_rights_clear(&rights, CAP_READ);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+  // ... until we remove CAP_IOCTL
+  cap_rights_clear(&rights, CAP_IOCTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights));
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(0, nioctls);
+  EXPECT_EQ(-1, cap_ioctls_limit(fd, &ioctl_nread, 1));
+
+  close(fd);
+}
+
+TEST(Ioctl, SubRights) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+
+  cap_ioctl_t ioctls[16];
+  ssize_t nioctls;
+  memset(ioctls, 0, sizeof(ioctls));
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(CAP_IOCTLS_ALL, nioctls);
+
+  cap_rights_t rights_ioctl;
+  cap_rights_init(&rights_ioctl, CAP_IOCTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(CAP_IOCTLS_ALL, nioctls);
+
+  // Check operations that need CAP_IOCTL with subrights pristine => OK.
+  int bytes;
+  EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+  int one = 1;
+  EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+
+  // Check operations that need CAP_IOCTL with all relevant subrights => OK.
+  cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX};
+  EXPECT_OK(cap_ioctls_limit(fd, both_ioctls, 2));
+  EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+  EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+
+
+  // Check what happens if we ask for subrights but don't have the space for them.
+  cap_ioctl_t before = 0xBBBBBBBB;
+  cap_ioctl_t one_ioctl = 0;
+  cap_ioctl_t after = 0xAAAAAAAA;
+  nioctls = cap_ioctls_get(fd, &one_ioctl, 1);
+  EXPECT_EQ(2, nioctls);
+  EXPECT_EQ(0xBBBBBBBB, before);
+  EXPECT_TRUE(one_ioctl == FIONREAD || one_ioctl == FIOCLEX);
+  EXPECT_EQ(0xAAAAAAAA, after);
+
+  // Check operations that need CAP_IOCTL with particular subrights.
+  int fd_nread = dup(fd);
+  int fd_clex = dup(fd);
+  cap_ioctl_t ioctl_nread = FIONREAD;
+  cap_ioctl_t ioctl_clex = FIOCLEX;
+  EXPECT_OK(cap_ioctls_limit(fd_nread, &ioctl_nread, 1));
+  EXPECT_OK(cap_ioctls_limit(fd_clex, &ioctl_clex, 1));
+  EXPECT_OK(ioctl(fd_nread, FIONREAD, &bytes));
+  EXPECT_NOTCAPABLE(ioctl(fd_clex, FIONREAD, &bytes));
+  EXPECT_OK(ioctl(fd_clex, FIOCLEX, &one));
+  EXPECT_NOTCAPABLE(ioctl(fd_nread, FIOCLEX, &one));
+
+  // Also check we can retrieve the subrights.
+  memset(ioctls, 0, sizeof(ioctls));
+  nioctls = cap_ioctls_get(fd_nread, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+  memset(ioctls, 0, sizeof(ioctls));
+  nioctls = cap_ioctls_get(fd_clex, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIOCLEX, ioctls[0]);
+  // And that we can't widen the subrights.
+  EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_nread, both_ioctls, 2));
+  EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_clex, both_ioctls, 2));
+  close(fd_nread);
+  close(fd_clex);
+
+  // Check operations that need CAP_IOCTL with no subrights => ENOTCAPABLE.
+  EXPECT_OK(cap_ioctls_limit(fd, NULL, 0));
+  EXPECT_NOTCAPABLE(ioctl(fd, FIONREAD, &bytes));
+  EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one));
+
+  close(fd);
+}
+
+#ifdef CAP_IOCTLS_LIMIT_MAX
+TEST(Ioctl, TooManySubRights) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+
+  cap_ioctl_t ioctls[CAP_IOCTLS_LIMIT_MAX + 1];
+  for (int ii = 0; ii <= CAP_IOCTLS_LIMIT_MAX; ii++) {
+    ioctls[ii] = ii + 1;
+  }
+
+  cap_rights_t rights_ioctl;
+  cap_rights_init(&rights_ioctl, CAP_IOCTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+  // Can only limit to a certain number of ioctls
+  EXPECT_EQ(-1, cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX + 1));
+  EXPECT_EQ(EINVAL, errno);
+  EXPECT_OK(cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX));
+
+  close(fd);
+}
+#else
+TEST(Ioctl, ManySubRights) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+
+  const int nioctls = 150000;
+  cap_ioctl_t* ioctls = (cap_ioctl_t*)calloc(nioctls, sizeof(cap_ioctl_t));
+  for (int ii = 0; ii < nioctls; ii++) {
+    ioctls[ii] = ii + 1;
+  }
+
+  cap_rights_t rights_ioctl;
+  cap_rights_init(&rights_ioctl, CAP_IOCTL);
+  EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+  EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls));
+  // Limit to a subset; if this takes a long time then there's an
+  // O(N^2) implementation of the ioctl list comparison.
+  EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls - 1));
+
+  close(fd);
+}
+#endif
+
+#endif
--- a/contrib/capsicum-test/linux.cc
+++ b/contrib/capsicum-test/linux.cc
--- a/contrib/capsicum-test/makefile
+++ b/contrib/capsicum-test/makefile
@ -0,0 +1,36 @@
+all: capsicum-test smoketest mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS)
+OBJECTS=capsicum-test-main.o capsicum-test.o capability-fd.o fexecve.o procdesc.o capmode.o fcntl.o ioctl.o openat.o sysctl.o select.o mqueue.o socket.o sctp.o capability-fd-pair.o linux.o overhead.o rename.o
+
+GTEST_DIR=gtest-1.8.1
+GTEST_INCS=-I$(GTEST_DIR)/include -I$(GTEST_DIR)
+GTEST_FLAGS=-DGTEST_USE_OWN_TR1_TUPLE=1 -DGTEST_HAS_TR1_TUPLE=1
+CXXFLAGS+=$(ARCHFLAG) -Wall -g $(GTEST_INCS) $(GTEST_FLAGS) --std=c++11
+CFLAGS+=$(ARCHFLAG) -Wall -g
+
+capsicum-test: $(OBJECTS) libgtest.a $(LOCAL_LIBS)
+	$(CXX) $(CXXFLAGS) -g -o $@ $(OBJECTS) libgtest.a -lpthread -lrt $(LIBSCTP) $(LIBCAPRIGHTS)
+
+# Small statically-linked program for fexecve tests
+# (needs to be statically linked so that execve()ing it
+# doesn't involve ld.so traversing the filesystem).
+mini-me: mini-me.c
+	$(CC) $(CFLAGS) -static -o $@ $<
+mini-me.noexec: mini-me
+	cp mini-me $@ && chmod -x $@
+mini-me.setuid: mini-me
+	rm -f $@ && cp mini-me $@&& sudo chown root $@ && sudo chmod u+s $@
+
+# Simple C test of Capsicum syscalls
+SMOKETEST_OBJECTS=smoketest.o
+smoketest: $(SMOKETEST_OBJECTS) $(LOCAL_LIBS)
+	$(CC) $(CFLAGS) -o $@ $(SMOKETEST_OBJECTS) $(LIBCAPRIGHTS)
+
+test: capsicum-test mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS)
+	./capsicum-test
+gtest-all.o:
+	$(CXX) $(ARCHFLAG) -I$(GTEST_DIR)/include -I$(GTEST_DIR) $(GTEST_FLAGS) -c ${GTEST_DIR}/src/gtest-all.cc
+libgtest.a: gtest-all.o
+	$(AR) -rv libgtest.a gtest-all.o
+
+clean:
+	rm -rf gtest-all.o libgtest.a capsicum-test mini-me mini-me.noexec smoketest $(SMOKETEST_OBJECTS) $(OBJECTS) $(LOCAL_CLEAN) $(EXTRA_PROGS)
--- a/contrib/capsicum-test/mini-me.c
+++ b/contrib/capsicum-test/mini-me.c
@ -0,0 +1,38 @@
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char* argv[]) {
+  if (argc == 2 && !strcmp(argv[1], "--pass")) {
+    fprintf(stderr,"[%d] %s immediately returning 0\n", getpid(), argv[0]);
+    return 0;
+  }
+
+  if (argc == 2 && !strcmp(argv[1], "--fail")) {
+    fprintf(stderr,"[%d] %s immediately returning 1\n", getpid(), argv[0]);
+    return 1;
+  }
+
+  if (argc == 2 && !strcmp(argv[1], "--checkroot")) {
+    int rc = (geteuid() == 0);
+    fprintf(stderr,"[uid:%d] %s immediately returning (geteuid() == 0) = %d\n", geteuid(), argv[0], rc);
+    return rc;
+  }
+
+  if (argc == 2 && !strcmp(argv[1], "--capmode")) {
+    /* Expect to already be in capability mode: check we can't open a file */
+    int rc = 0;
+
+    int fd = open("/etc/passwd", O_RDONLY);
+    if (fd > 0) {
+      fprintf(stderr,"[%d] %s unexpectedly able to open file\n", getpid(), argv[0]);
+      rc = 1;
+    }
+    fprintf(stderr,"[%d] %s --capmode returning %d\n", getpid(), argv[0], rc);
+    return rc;
+  }
+
+  return -1;
+}
--- a/contrib/capsicum-test/mqueue.cc
+++ b/contrib/capsicum-test/mqueue.cc
@ -0,0 +1,100 @@
+// Tests for POSIX message queue functionality.
+
+#include <time.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+// Run a test case in a forked process, possibly cleaning up a
+// message after completion
+#define FORK_TEST_ON_MQ(test_case_name, test_name, test_mq)    \
+    static void test_case_name##_##test_name##_ForkTest();     \
+    TEST(test_case_name, test_name ## Forked) {                \
+      _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest,  \
+                     #test_case_name, #test_name);             \
+      const char *mqname = test_mq;                            \
+      if (mqname) mq_unlink_(mqname);                          \
+    }                                                          \
+    static void test_case_name##_##test_name##_ForkTest()
+
+static bool invoked;
+void seen_it_done_it(int) {
+  invoked = true;
+}
+
+FORK_TEST_ON_MQ(PosixMqueue, CapMode, "/cap_mq") {
+  int mq = mq_open_("/cap_mq", O_RDWR|O_CREAT, 0644, NULL);
+  // On FreeBSD, turn on message queue support with:
+  //  - 'kldload mqueuefs'
+  //  - 'options P1003_1B_MQUEUE' in kernel build config.
+  if (mq < 0 && errno == ENOSYS) {
+    TEST_SKIPPED("mq_open -> -ENOSYS");
+    return;
+  }
+  EXPECT_OK(mq);
+  cap_rights_t r_read;
+  cap_rights_init(&r_read, CAP_READ);
+  cap_rights_t r_write;
+  cap_rights_init(&r_write, CAP_WRITE);
+  cap_rights_t r_poll;
+  cap_rights_init(&r_poll, CAP_EVENT);
+
+  int cap_read_mq = dup(mq);
+  EXPECT_OK(cap_read_mq);
+  EXPECT_OK(cap_rights_limit(cap_read_mq, &r_read));
+  int cap_write_mq = dup(mq);
+  EXPECT_OK(cap_write_mq);
+  EXPECT_OK(cap_rights_limit(cap_write_mq, &r_write));
+  int cap_poll_mq = dup(mq);
+  EXPECT_OK(cap_poll_mq);
+  EXPECT_OK(cap_rights_limit(cap_poll_mq, &r_poll));
+  EXPECT_OK(mq_close_(mq));
+
+  signal(SIGUSR2, seen_it_done_it);
+
+  EXPECT_OK(cap_enter());  // Enter capability mode
+
+  // Can no longer access the message queue via the POSIX IPC namespace.
+  EXPECT_CAPMODE(mq_open_("/cap_mw", O_RDWR|O_CREAT, 0644, NULL));
+
+  struct sigevent se;
+  se.sigev_notify = SIGEV_SIGNAL;
+  se.sigev_signo = SIGUSR2;
+  EXPECT_OK(mq_notify_(cap_poll_mq, &se));
+  EXPECT_NOTCAPABLE(mq_notify_(cap_read_mq, &se));
+  EXPECT_NOTCAPABLE(mq_notify_(cap_write_mq, &se));
+
+  const unsigned int kPriority = 10;
+  const char* message = "xyzzy";
+  struct timespec ts;
+  ts.tv_sec = 1;
+  ts.tv_nsec = 0;
+  EXPECT_OK(mq_timedsend_(cap_write_mq, message, strlen(message) + 1, kPriority, &ts));
+  EXPECT_NOTCAPABLE(mq_timedsend_(cap_read_mq, message, strlen(message) + 1, kPriority, &ts));
+
+  sleep(1);  // Give the notification a chance to arrive.
+  EXPECT_TRUE(invoked);
+
+  struct mq_attr mqa;
+  EXPECT_OK(mq_getattr_(cap_poll_mq, &mqa));
+  EXPECT_OK(mq_setattr_(cap_poll_mq, &mqa, NULL));
+  EXPECT_NOTCAPABLE(mq_getattr_(cap_write_mq, &mqa));
+
+  char* buffer = (char *)malloc(mqa.mq_msgsize);
+  unsigned int priority;
+  EXPECT_NOTCAPABLE(mq_timedreceive_(cap_write_mq, buffer, mqa.mq_msgsize, &priority, &ts));
+  EXPECT_OK(mq_timedreceive_(cap_read_mq, buffer, mqa.mq_msgsize, &priority, &ts));
+  EXPECT_EQ(std::string(message), std::string(buffer));
+  EXPECT_EQ(kPriority, priority);
+  free(buffer);
+
+  close(cap_read_mq);
+  close(cap_write_mq);
+  close(cap_poll_mq);
+}
--- a/contrib/capsicum-test/openat.cc
+++ b/contrib/capsicum-test/openat.cc
@ -0,0 +1,361 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+#include "syscalls.h"
+
+// Check an open call works and close the resulting fd.
+#define EXPECT_OPEN_OK(f) do { \
+    int _fd = f;               \
+    EXPECT_OK(_fd);            \
+    close(_fd);                \
+  } while (0)
+
+static void CreateFile(const char *filename, const char *contents) {
+  int fd = open(filename, O_CREAT|O_RDWR, 0644);
+  EXPECT_OK(fd);
+  EXPECT_OK(write(fd, contents, strlen(contents)));
+  close(fd);
+}
+
+// Test openat(2) in a variety of sitations to ensure that it obeys Capsicum
+// "strict relative" rules:
+//
+// 1. Use strict relative lookups in capability mode or when operating
+//    relative to a capability.
+// 2. When performing strict relative lookups, absolute paths (including
+//    symlinks to absolute paths) are not allowed, nor are paths containing
+//    '..' components.
+//
+// These rules apply when:
+//  - the directory FD is a Capsicum capability
+//  - the process is in capability mode
+//  - the openat(2) operation includes the O_BENEATH flag.
+FORK_TEST(Openat, Relative) {
+  int etc = open("/etc/", O_RDONLY);
+  EXPECT_OK(etc);
+
+  cap_rights_t r_base;
+  cap_rights_init(&r_base, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP, CAP_FCNTL, CAP_IOCTL);
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ);
+  cap_rights_t r_rl;
+  cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP);
+
+  int etc_cap = dup(etc);
+  EXPECT_OK(etc_cap);
+  EXPECT_OK(cap_rights_limit(etc_cap, &r_ro));
+  int etc_cap_ro = dup(etc);
+  EXPECT_OK(etc_cap_ro);
+  EXPECT_OK(cap_rights_limit(etc_cap_ro, &r_rl));
+  int etc_cap_base = dup(etc);
+  EXPECT_OK(etc_cap_base);
+  EXPECT_OK(cap_rights_limit(etc_cap_base, &r_base));
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+  // Also limit fcntl(2) subrights.
+  EXPECT_OK(cap_fcntls_limit(etc_cap_base, CAP_FCNTL_GETFL));
+#endif
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+  // Also limit ioctl(2) subrights.
+  cap_ioctl_t ioctl_nread = FIONREAD;
+  EXPECT_OK(cap_ioctls_limit(etc_cap_base, &ioctl_nread, 1));
+#endif
+
+  // openat(2) with regular file descriptors in non-capability mode
+  // Should Just Work (tm).
+  EXPECT_OPEN_OK(openat(etc, "/etc/passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc, "../etc/passwd", O_RDONLY));
+
+  // Lookups relative to capabilities should be strictly relative.
+  // When not in capability mode, we don't actually require CAP_LOOKUP.
+  EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY));
+
+  // Performing openat(2) on a path with leading slash ignores
+  // the provided directory FD.
+  EXPECT_OPEN_OK(openat(etc_cap_ro, "/etc/passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc_cap_base, "/etc/passwd", O_RDONLY));
+  // Relative lookups that go upward are not allowed.
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY);
+
+  // A file opened relative to a capability should itself be a capability.
+  int fd = openat(etc_cap_base, "passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  cap_rights_t rights;
+  EXPECT_OK(cap_rights_get(fd, &rights));
+  EXPECT_RIGHTS_IN(&rights, &r_base);
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+  cap_fcntl_t fcntls;
+  EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+  EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+#endif
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+  cap_ioctl_t ioctls[16];
+  ssize_t nioctls;
+  memset(ioctls, 0, sizeof(ioctls));
+  nioctls = cap_ioctls_get(fd, ioctls, 16);
+  EXPECT_OK(nioctls);
+  EXPECT_EQ(1, nioctls);
+  EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+#endif
+  close(fd);
+
+  // Enter capability mode; now ALL lookups are strictly relative.
+  EXPECT_OK(cap_enter());
+
+  // Relative lookups on regular files or capabilities with CAP_LOOKUP
+  // ought to succeed.
+  EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY));
+  EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY));
+
+  // Lookup relative to capabilities without CAP_LOOKUP should fail.
+  EXPECT_NOTCAPABLE(openat(etc_cap, "passwd", O_RDONLY));
+
+  // Absolute lookups should fail.
+  EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "/etc/passwd", O_RDONLY);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "/etc/passwd", O_RDONLY);
+
+  // Lookups containing '..' should fail in capability mode.
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "../etc/passwd", O_RDONLY);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY);
+
+  fd = openat(etc, "passwd", O_RDONLY);
+  EXPECT_OK(fd);
+
+  // A file opened relative to a capability should itself be a capability.
+  fd = openat(etc_cap_base, "passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  EXPECT_OK(cap_rights_get(fd, &rights));
+  EXPECT_RIGHTS_IN(&rights, &r_base);
+  close(fd);
+
+  fd = openat(etc_cap_ro, "passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  EXPECT_OK(cap_rights_get(fd, &rights));
+  EXPECT_RIGHTS_IN(&rights, &r_rl);
+  close(fd);
+}
+
+#define TOPDIR "cap_topdir"
+#define SUBDIR TOPDIR "/subdir"
+class OpenatTest : public ::testing::Test {
+ public:
+  // Build a collection of files, subdirs and symlinks:
+  //  /tmp/cap_topdir/
+  //                 /topfile
+  //                 /subdir/
+  //                 /subdir/bottomfile
+  //                 /symlink.samedir              -> topfile
+  //                 /dsymlink.samedir             -> ./
+  //                 /symlink.down                 -> subdir/bottomfile
+  //                 /dsymlink.down                -> subdir/
+  //                 /symlink.absolute_out         -> /etc/passwd
+  //                 /dsymlink.absolute_out        -> /etc/
+  //                 /symlink.relative_in          -> ../../tmp/cap_topdir/topfile
+  //                 /dsymlink.relative_in         -> ../../tmp/cap_topdir/
+  //                 /symlink.relative_out         -> ../../etc/passwd
+  //                 /dsymlink.relative_out        -> ../../etc/
+  //                 /subdir/dsymlink.absolute_in  -> /tmp/cap_topdir/
+  //                 /subdir/dsymlink.up           -> ../
+  //                 /subdir/symlink.absolute_in   -> /tmp/cap_topdir/topfile
+  //                 /subdir/symlink.up            -> ../topfile
+  // (In practice, this is a little more complicated because tmpdir might
+  // not be "/tmp".)
+  OpenatTest() {
+    // Create a couple of nested directories
+    int rc = mkdir(TmpFile(TOPDIR), 0755);
+    EXPECT_OK(rc);
+    if (rc < 0) {
+      EXPECT_EQ(EEXIST, errno);
+    }
+    rc = mkdir(TmpFile(SUBDIR), 0755);
+    EXPECT_OK(rc);
+    if (rc < 0) {
+      EXPECT_EQ(EEXIST, errno);
+    }
+
+    // Figure out a path prefix (like "../..") that gets us to the root
+    // directory from TmpFile(TOPDIR).
+    const char *p = TmpFile(TOPDIR);  // maybe "/tmp/somewhere/cap_topdir"
+    std::string dots2root = "..";
+    while (*p++ != '\0') {
+      if (*p == '/') {
+        dots2root += "/..";
+      }
+    }
+
+    // Create normal files in each.
+    CreateFile(TmpFile(TOPDIR "/topfile"), "Top-level file");
+    CreateFile(TmpFile(SUBDIR "/bottomfile"), "File in subdirectory");
+
+    // Create various symlinks to files.
+    EXPECT_OK(symlink("topfile", TmpFile(TOPDIR "/symlink.samedir")));
+    EXPECT_OK(symlink("subdir/bottomfile", TmpFile(TOPDIR "/symlink.down")));
+    EXPECT_OK(symlink(TmpFile(TOPDIR "/topfile"), TmpFile(SUBDIR "/symlink.absolute_in")));
+    EXPECT_OK(symlink("/etc/passwd", TmpFile(TOPDIR "/symlink.absolute_out")));
+    std::string dots2top = dots2root + TmpFile(TOPDIR "/topfile");
+    EXPECT_OK(symlink(dots2top.c_str(), TmpFile(TOPDIR "/symlink.relative_in")));
+    std::string dots2passwd = dots2root + "/etc/passwd";
+    EXPECT_OK(symlink(dots2passwd.c_str(), TmpFile(TOPDIR "/symlink.relative_out")));
+    EXPECT_OK(symlink("../topfile", TmpFile(SUBDIR "/symlink.up")));
+
+    // Create various symlinks to directories.
+    EXPECT_OK(symlink("./", TmpFile(TOPDIR "/dsymlink.samedir")));
+    EXPECT_OK(symlink("subdir/", TmpFile(TOPDIR "/dsymlink.down")));
+    EXPECT_OK(symlink(TmpFile(TOPDIR "/"), TmpFile(SUBDIR "/dsymlink.absolute_in")));
+    EXPECT_OK(symlink("/etc/", TmpFile(TOPDIR "/dsymlink.absolute_out")));
+    std::string dots2cwd = dots2root + tmpdir + "/";
+    EXPECT_OK(symlink(dots2cwd.c_str(), TmpFile(TOPDIR "/dsymlink.relative_in")));
+    std::string dots2etc = dots2root + "/etc/";
+    EXPECT_OK(symlink(dots2etc.c_str(), TmpFile(TOPDIR "/dsymlink.relative_out")));
+    EXPECT_OK(symlink("../", TmpFile(SUBDIR "/dsymlink.up")));
+
+    // Open directory FDs for those directories and for cwd.
+    dir_fd_ = open(TmpFile(TOPDIR), O_RDONLY);
+    EXPECT_OK(dir_fd_);
+    sub_fd_ = open(TmpFile(SUBDIR), O_RDONLY);
+    EXPECT_OK(sub_fd_);
+    cwd_ = openat(AT_FDCWD, ".", O_RDONLY);
+    EXPECT_OK(cwd_);
+    // Move into the directory for the test.
+    EXPECT_OK(fchdir(dir_fd_));
+  }
+  ~OpenatTest() {
+    fchdir(cwd_);
+    close(cwd_);
+    close(sub_fd_);
+    close(dir_fd_);
+    unlink(TmpFile(SUBDIR "/symlink.up"));
+    unlink(TmpFile(SUBDIR "/symlink.absolute_in"));
+    unlink(TmpFile(TOPDIR "/symlink.absolute_out"));
+    unlink(TmpFile(TOPDIR "/symlink.relative_in"));
+    unlink(TmpFile(TOPDIR "/symlink.relative_out"));
+    unlink(TmpFile(TOPDIR "/symlink.down"));
+    unlink(TmpFile(TOPDIR "/symlink.samedir"));
+    unlink(TmpFile(SUBDIR "/dsymlink.up"));
+    unlink(TmpFile(SUBDIR "/dsymlink.absolute_in"));
+    unlink(TmpFile(TOPDIR "/dsymlink.absolute_out"));
+    unlink(TmpFile(TOPDIR "/dsymlink.relative_in"));
+    unlink(TmpFile(TOPDIR "/dsymlink.relative_out"));
+    unlink(TmpFile(TOPDIR "/dsymlink.down"));
+    unlink(TmpFile(TOPDIR "/dsymlink.samedir"));
+    unlink(TmpFile(SUBDIR "/bottomfile"));
+    unlink(TmpFile(TOPDIR "/topfile"));
+    rmdir(TmpFile(SUBDIR));
+    rmdir(TmpFile(TOPDIR));
+  }
+
+  // Check openat(2) policing that is common across capabilities, capability mode and O_BENEATH.
+  void CheckPolicing(int oflag) {
+    // OK for normal access.
+    EXPECT_OPEN_OK(openat(dir_fd_, "topfile", O_RDONLY|oflag));
+    EXPECT_OPEN_OK(openat(dir_fd_, "subdir/bottomfile", O_RDONLY|oflag));
+    EXPECT_OPEN_OK(openat(sub_fd_, "bottomfile", O_RDONLY|oflag));
+    EXPECT_OPEN_OK(openat(sub_fd_, ".", O_RDONLY|oflag));
+
+    // Can't open paths with ".." in them.
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../topfile", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../subdir/bottomfile", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "..", O_RDONLY|oflag);
+
+#ifdef HAVE_OPENAT_INTERMEDIATE_DOTDOT
+    // OK for dotdot lookups that don't escape the top directory
+    EXPECT_OPEN_OK(openat(dir_fd_, "subdir/../topfile", O_RDONLY|oflag));
+#endif
+
+    // Check that we can't escape the top directory by the cunning
+    // ruse of going via a subdirectory.
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "subdir/../../etc/passwd", O_RDONLY|oflag);
+
+    // Should only be able to open symlinks that stay within the directory.
+    EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY|oflag));
+    EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY|oflag));
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.absolute_out", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_in", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_out", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "symlink.absolute_in", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "symlink.up", O_RDONLY|oflag);
+
+    EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.samedir/topfile", O_RDONLY|oflag));
+    EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.down/bottomfile", O_RDONLY|oflag));
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.absolute_out/passwd", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_in/topfile", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_out/passwd", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "dsymlink.absolute_in/topfile", O_RDONLY|oflag);
+    EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "dsymlink.up/topfile", O_RDONLY|oflag);
+
+    // Although recall that O_NOFOLLOW prevents symlink following in final component.
+    EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.samedir", O_RDONLY|O_NOFOLLOW|oflag));
+    EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.down", O_RDONLY|O_NOFOLLOW|oflag));
+  }
+
+ protected:
+  int dir_fd_;
+  int sub_fd_;
+  int cwd_;
+};
+
+TEST_F(OpenatTest, WithCapability) {
+  // Any kind of symlink can be opened relative to an ordinary directory FD.
+  EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY));
+  EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY));
+  EXPECT_OPEN_OK(openat(dir_fd_, "symlink.absolute_out", O_RDONLY));
+  EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_in", O_RDONLY));
+  EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_out", O_RDONLY));
+  EXPECT_OPEN_OK(openat(sub_fd_, "symlink.absolute_in", O_RDONLY));
+  EXPECT_OPEN_OK(openat(sub_fd_, "symlink.up", O_RDONLY));
+
+  // Now make both DFDs into Capsicum capabilities.
+  cap_rights_t r_rl;
+  cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP, CAP_FCHDIR);
+  EXPECT_OK(cap_rights_limit(dir_fd_, &r_rl));
+  EXPECT_OK(cap_rights_limit(sub_fd_, &r_rl));
+  CheckPolicing(0);
+  // Use of AT_FDCWD is independent of use of a capability.
+  // Can open paths starting with "/" against a capability dfd, because the dfd is ignored.
+}
+
+FORK_TEST_F(OpenatTest, InCapabilityMode) {
+  EXPECT_OK(cap_enter());  // Enter capability mode
+  CheckPolicing(0);
+
+  // Use of AT_FDCWD is banned in capability mode.
+  EXPECT_CAPMODE(openat(AT_FDCWD, "topfile", O_RDONLY));
+  EXPECT_CAPMODE(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY));
+  EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+
+  // Can't open paths starting with "/" in capability mode.
+  EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY);
+}
+
+#ifdef O_BENEATH
+TEST_F(OpenatTest, WithFlag) {
+  CheckPolicing(O_BENEATH);
+
+  // Check with AT_FDCWD.
+  EXPECT_OPEN_OK(openat(AT_FDCWD, "topfile", O_RDONLY|O_BENEATH));
+  EXPECT_OPEN_OK(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY|O_BENEATH));
+
+  // Can't open paths starting with "/" with O_BENEATH specified.
+  EXPECT_OPENAT_FAIL_TRAVERSAL(AT_FDCWD, "/etc/passwd", O_RDONLY|O_BENEATH);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY|O_BENEATH);
+  EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY|O_BENEATH);
+}
+
+FORK_TEST_F(OpenatTest, WithFlagInCapabilityMode) {
+  EXPECT_OK(cap_enter());  // Enter capability mode
+  CheckPolicing(O_BENEATH);
+}
+#endif
--- a/contrib/capsicum-test/overhead.cc
+++ b/contrib/capsicum-test/overhead.cc
@ -0,0 +1,45 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+#ifdef HAVE_SYSCALL
+double RepeatSyscall(int count, int nr, long arg1, long arg2, long arg3) {
+  const clock_t t0 = clock(); // or gettimeofday or whatever
+  for (int ii = 0; ii < count; ii++) {
+    syscall(nr, arg1, arg2, arg3);
+  }
+  const clock_t t1 = clock();
+  return (t1 - t0) / (double)CLOCKS_PER_SEC;
+}
+
+typedef int (*EntryFn)(void);
+
+double CompareSyscall(EntryFn entry_fn, int count, int nr,
+                      long arg1, long arg2, long arg3) {
+  double bare = RepeatSyscall(count, nr, arg1, arg2, arg3);
+  EXPECT_OK(entry_fn());
+  double capmode = RepeatSyscall(count, nr, arg1, arg2, arg3);
+  if (verbose) fprintf(stderr, "%d iterations bare=%fs capmode=%fs ratio=%.2f%%\n",
+                       count, bare, capmode, 100.0*capmode/bare);
+  if (bare==0.0) {
+    if (capmode==0.0) return 1.0;
+    return 999.0;
+  }
+  return capmode/bare;
+}
+
+FORK_TEST(Overhead, GetTid) {
+  EXPECT_GT(10, CompareSyscall(&cap_enter, 10000, __NR_gettid, 0, 0, 0));
+}
+FORK_TEST(Overhead, Seek) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_GT(50, CompareSyscall(&cap_enter, 10000, __NR_lseek, fd, 0, SEEK_SET));
+  close(fd);
+}
+#endif
--- a/contrib/capsicum-test/procdesc.cc
+++ b/contrib/capsicum-test/procdesc.cc
@ -0,0 +1,977 @@
+// Tests for the process descriptor API for Linux.
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <iomanip>
+#include <map>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+#ifndef __WALL
+// Linux requires __WALL in order for waitpid(specific_pid,...) to
+// see and reap any specific pid.  Define this to nothing for platforms
+// (FreeBSD) where it doesn't exist, to reduce macroing.
+#define __WALL 0
+#endif
+
+// TODO(drysdale): it would be nice to use proper synchronization between
+// processes, rather than synchronization-via-sleep; faster too.
+
+
+//------------------------------------------------
+// Utilities for the tests.
+
+static pid_t pdwait4_(int pd, int *status, int options, struct rusage *ru) {
+#ifdef HAVE_PDWAIT4
+  return pdwait4(pd, status, options, ru);
+#else
+  // Simulate pdwait4() with wait4(pdgetpid()); this won't work in capability mode.
+  pid_t pid = -1;
+  int rc = pdgetpid(pd, &pid);
+  if (rc < 0) {
+    return rc;
+  }
+  options |= __WALL;
+  return wait4(pid, status, options, ru);
+#endif
+}
+
+static void print_rusage(FILE *f, struct rusage *ru) {
+  fprintf(f, "  User CPU time=%ld.%06ld\n", (long)ru->ru_utime.tv_sec, (long)ru->ru_utime.tv_usec);
+  fprintf(f, "  System CPU time=%ld.%06ld\n", (long)ru->ru_stime.tv_sec, (long)ru->ru_stime.tv_usec);
+  fprintf(f, "  Max RSS=%ld\n", ru->ru_maxrss);
+}
+
+static void print_stat(FILE *f, const struct stat *stat) {
+  fprintf(f,
+          "{ .st_dev=%ld, st_ino=%ld, st_mode=%04o, st_nlink=%ld, st_uid=%d, st_gid=%d,\n"
+          "  .st_rdev=%ld, .st_size=%ld, st_blksize=%ld, .st_block=%ld,\n  "
+#ifdef HAVE_STAT_BIRTHTIME
+          ".st_birthtime=%ld, "
+#endif
+          ".st_atime=%ld, .st_mtime=%ld, .st_ctime=%ld}\n",
+          (long)stat->st_dev, (long)stat->st_ino, stat->st_mode,
+          (long)stat->st_nlink, stat->st_uid, stat->st_gid,
+          (long)stat->st_rdev, (long)stat->st_size, (long)stat->st_blksize,
+          (long)stat->st_blocks,
+#ifdef HAVE_STAT_BIRTHTIME
+          (long)stat->st_birthtime,
+#endif
+          (long)stat->st_atime, (long)stat->st_mtime, (long)stat->st_ctime);
+}
+
+static std::map<int,bool> had_signal;
+static void handle_signal(int x) {
+  had_signal[x] = true;
+}
+
+// Check that the given child process terminates as expected.
+void CheckChildFinished(pid_t pid, bool signaled=false) {
+  // Wait for the child to finish.
+  int rc;
+  int status = 0;
+  do {
+    rc = waitpid(pid, &status, __WALL);
+    if (rc < 0) {
+      fprintf(stderr, "Warning: waitpid error %s (%d)\n", strerror(errno), errno);
+      ADD_FAILURE() << "Failed to wait for child";
+      break;
+    } else if (rc == pid) {
+      break;
+    }
+  } while (true);
+  EXPECT_EQ(pid, rc);
+  if (rc == pid) {
+    if (signaled) {
+      EXPECT_TRUE(WIFSIGNALED(status));
+    } else {
+      EXPECT_TRUE(WIFEXITED(status)) << std::hex << status;
+      EXPECT_EQ(0, WEXITSTATUS(status));
+    }
+  }
+}
+
+//------------------------------------------------
+// Basic tests of process descriptor functionality
+
+TEST(Pdfork, Simple) {
+  int pd = -1;
+  pid_t parent = getpid_();
+  int pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: check pid values.
+    EXPECT_EQ(-1, pd);
+    EXPECT_NE(parent, getpid_());
+    EXPECT_EQ(parent, getppid());
+    sleep(1);
+    exit(0);
+  }
+  usleep(100);  // ensure the child has a chance to run
+  EXPECT_NE(-1, pd);
+  EXPECT_PID_ALIVE(pid);
+  int pid_got;
+  EXPECT_OK(pdgetpid(pd, &pid_got));
+  EXPECT_EQ(pid, pid_got);
+
+  // Wait long enough for the child to exit().
+  sleep(2);
+  EXPECT_PID_ZOMBIE(pid);
+
+  // Wait for the the child.
+  int status;
+  struct rusage ru;
+  memset(&ru, 0, sizeof(ru));
+  int waitrc = pdwait4_(pd, &status, 0, &ru);
+  EXPECT_EQ(pid, waitrc);
+  if (verbose) {
+    fprintf(stderr, "For pd %d pid %d:\n", pd, pid);
+    print_rusage(stderr, &ru);
+  }
+  EXPECT_PID_GONE(pid);
+
+  // Can only pdwait4(pd) once (as initial call reaps zombie).
+  memset(&ru, 0, sizeof(ru));
+  EXPECT_EQ(-1, pdwait4_(pd, &status, 0, &ru));
+  EXPECT_EQ(ECHILD, errno);
+
+  EXPECT_OK(close(pd));
+}
+
+TEST(Pdfork, InvalidFlag) {
+  int pd = -1;
+  int pid = pdfork(&pd, PD_DAEMON<<5);
+  if (pid == 0) {
+    exit(1);
+  }
+  EXPECT_EQ(-1, pid);
+  EXPECT_EQ(EINVAL, errno);
+  if (pid > 0) waitpid(pid, NULL, __WALL);
+}
+
+TEST(Pdfork, TimeCheck) {
+  time_t now = time(NULL);  // seconds since epoch
+  EXPECT_NE(-1, now);
+  if (verbose) fprintf(stderr, "Calling pdfork around %ld\n", (long)(long)now);
+
+  int pd = -1;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: check we didn't get a valid process descriptor then exit.
+    EXPECT_EQ(-1, pdgetpid(pd, &pid));
+    EXPECT_EQ(EBADF, errno);
+    exit(HasFailure());
+  }
+
+#ifdef HAVE_PROCDESC_FSTAT
+  // Parent process. Ensure that [acm]times have been set correctly.
+  struct stat stat;
+  memset(&stat, 0, sizeof(stat));
+  EXPECT_OK(fstat(pd, &stat));
+  if (verbose) print_stat(stderr, &stat);
+
+#ifdef HAVE_STAT_BIRTHTIME
+  EXPECT_GE(now, stat.st_birthtime);
+  EXPECT_EQ(stat.st_birthtime, stat.st_atime);
+#endif
+  EXPECT_LT((now - stat.st_atime), 2);
+  EXPECT_EQ(stat.st_atime, stat.st_ctime);
+  EXPECT_EQ(stat.st_ctime, stat.st_mtime);
+#endif
+
+  // Wait for the child to finish.
+  pid_t pd_pid = -1;
+  EXPECT_OK(pdgetpid(pd, &pd_pid));
+  EXPECT_EQ(pid, pd_pid);
+  CheckChildFinished(pid);
+}
+
+TEST(Pdfork, UseDescriptor) {
+  int pd = -1;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: immediately exit
+    exit(0);
+  }
+  CheckChildFinished(pid);
+}
+
+TEST(Pdfork, NonProcessDescriptor) {
+  int fd = open("/etc/passwd", O_RDONLY);
+  EXPECT_OK(fd);
+  // pd*() operations should fail on a non-process descriptor.
+  EXPECT_EQ(-1, pdkill(fd, SIGUSR1));
+  int status;
+  EXPECT_EQ(-1, pdwait4_(fd, &status, 0, NULL));
+  pid_t pid;
+  EXPECT_EQ(-1, pdgetpid(fd, &pid));
+  close(fd);
+}
+
+static void *SubThreadMain(void *) {
+  while (true) {
+    if (verbose) fprintf(stderr, "      subthread: \"I aten't dead\"\n");
+    usleep(100000);
+  }
+  return NULL;
+}
+
+static void *ThreadMain(void *) {
+  int pd;
+  pid_t child = pdfork(&pd, 0);
+  if (child == 0) {
+    // Child: start a subthread then loop
+    pthread_t child_subthread;
+    EXPECT_OK(pthread_create(&child_subthread, NULL, SubThreadMain, NULL));
+    while (true) {
+      if (verbose) fprintf(stderr, "    pdforked process %d: \"I aten't dead\"\n", getpid());
+      usleep(100000);
+    }
+    exit(0);
+  }
+  if (verbose) fprintf(stderr, "  thread generated pd %d\n", pd);
+  sleep(2);
+
+  // Pass the process descriptor back to the main thread.
+  return reinterpret_cast<void *>(pd);
+}
+
+TEST(Pdfork, FromThread) {
+  // Fire off a new thread to do all of the creation work.
+  pthread_t child_thread;
+  EXPECT_OK(pthread_create(&child_thread, NULL, ThreadMain, NULL));
+  void *data;
+  EXPECT_OK(pthread_join(child_thread, &data));
+  int pd = reinterpret_cast<intptr_t>(data);
+  if (verbose) fprintf(stderr, "retrieved pd %d from terminated thread\n", pd);
+
+  // Kill and reap.
+  pid_t pid;
+  EXPECT_OK(pdgetpid(pd, &pid));
+  EXPECT_OK(pdkill(pd, SIGKILL));
+  int status;
+  EXPECT_EQ(pid, pdwait4_(pd, &status, 0, NULL));
+  EXPECT_TRUE(WIFSIGNALED(status));
+}
+
+//------------------------------------------------
+// More complicated tests.
+
+
+// Test fixture that pdfork()s off a child process, which terminates
+// when it receives anything on a pipe.
+class PipePdforkBase : public ::testing::Test {
+ public:
+  PipePdforkBase(int pdfork_flags) : pd_(-1), pid_(-1) {
+    had_signal.clear();
+    int pipes[2];
+    EXPECT_OK(pipe(pipes));
+    pipe_ = pipes[1];
+    int parent = getpid_();
+    if (verbose) fprintf(stderr, "[%d] about to pdfork()\n", getpid_());
+    int rc = pdfork(&pd_, pdfork_flags);
+    EXPECT_OK(rc);
+    if (rc == 0) {
+      // Child process: blocking-read an int from the pipe then exit with that value.
+      EXPECT_NE(parent, getpid_());
+      EXPECT_EQ(parent, getppid());
+      if (verbose) fprintf(stderr, "  [%d] child of %d waiting for value on pipe\n", getpid_(), getppid());
+      read(pipes[0], &rc, sizeof(rc));
+      if (verbose) fprintf(stderr, "  [%d] got value %d on pipe, exiting\n", getpid_(), rc);
+      exit(rc);
+    }
+    pid_ = rc;
+    usleep(100);  // ensure the child has a chance to run
+  }
+  ~PipePdforkBase() {
+    // Terminate by any means necessary.
+    if (pd_ > 0) {
+      pdkill(pd_, SIGKILL);
+      close(pd_);
+    }
+    if (pid_ > 0) {
+      kill(pid_, SIGKILL);
+      waitpid(pid_, NULL, __WALL|WNOHANG);
+    }
+    // Check signal expectations.
+    EXPECT_FALSE(had_signal[SIGCHLD]);
+  }
+  int TerminateChild() {
+    // Tell the child to exit.
+    int zero = 0;
+    if (verbose) fprintf(stderr, "[%d] write 0 to pipe\n", getpid_());
+    return write(pipe_, &zero, sizeof(zero));
+  }
+ protected:
+  int pd_;
+  int pipe_;
+  pid_t pid_;
+};
+
+class PipePdfork : public PipePdforkBase {
+ public:
+  PipePdfork() : PipePdforkBase(0) {}
+};
+
+class PipePdforkDaemon : public PipePdforkBase {
+ public:
+  PipePdforkDaemon() : PipePdforkBase(PD_DAEMON) {}
+};
+
+// Can we poll a process descriptor?
+TEST_F(PipePdfork, Poll) {
+  // Poll the process descriptor, nothing happening.
+  struct pollfd fdp;
+  fdp.fd = pd_;
+  fdp.events = POLLIN | POLLERR | POLLHUP;
+  fdp.revents = 0;
+  EXPECT_EQ(0, poll(&fdp, 1, 0));
+
+  TerminateChild();
+
+  // Poll again, should have activity on the process descriptor.
+  EXPECT_EQ(1, poll(&fdp, 1, 2000));
+  EXPECT_TRUE(fdp.revents & POLLHUP);
+
+  // Poll a third time, still have POLLHUP.
+  fdp.revents = 0;
+  EXPECT_EQ(1, poll(&fdp, 1, 0));
+  EXPECT_TRUE(fdp.revents & POLLHUP);
+}
+
+// Can multiple processes poll on the same descriptor?
+TEST_F(PipePdfork, PollMultiple) {
+  int child = fork();
+  EXPECT_OK(child);
+  if (child == 0) {
+    // Child: wait to give time for setup, then write to the pipe (which will
+    // induce exit of the pdfork()ed process) and exit.
+    sleep(1);
+    TerminateChild();
+    exit(0);
+  }
+  usleep(100);  // ensure the child has a chance to run
+
+  // Fork again
+  int doppel = fork();
+  EXPECT_OK(doppel);
+  // We now have:
+  //   pid A: main process, here
+  //   |--pid B: pdfork()ed process, blocked on read()
+  //   |--pid C: fork()ed process, in sleep(1) above
+  //   +--pid D: doppel process, here
+
+  // Both A and D execute the following code.
+  // First, check no activity on the process descriptor yet.
+  struct pollfd fdp;
+  fdp.fd = pd_;
+  fdp.events = POLLIN | POLLERR | POLLHUP;
+  fdp.revents = 0;
+  EXPECT_EQ(0, poll(&fdp, 1, 0));
+
+  // Now, wait (indefinitely) for activity on the process descriptor.
+  // We expect:
+  //  - pid C will finish its sleep, write to the pipe and exit
+  //  - pid B will unblock from read(), and exit
+  //  - this will generate an event on the process descriptor...
+  //  - ...in both process A and process D.
+  EXPECT_EQ(1, poll(&fdp, 1, 2000));
+  EXPECT_TRUE(fdp.revents & POLLHUP);
+
+  if (doppel == 0) {
+    // Child: process D exits.
+    exit(0);
+  } else {
+    // Parent: wait on process D.
+    int rc = 0;
+    waitpid(doppel, &rc, __WALL);
+    EXPECT_TRUE(WIFEXITED(rc));
+    EXPECT_EQ(0, WEXITSTATUS(rc));
+    // Also wait on process B.
+    CheckChildFinished(child);
+  }
+}
+
+// Check that exit status/rusage for a dead pdfork()ed child can be retrieved
+// via any process descriptor, multiple times.
+TEST_F(PipePdfork, MultipleRetrieveExitStatus) {
+  EXPECT_PID_ALIVE(pid_);
+  int pd_copy = dup(pd_);
+  EXPECT_LT(0, TerminateChild());
+
+  int status;
+  struct rusage ru;
+  memset(&ru, 0, sizeof(ru));
+  int waitrc = pdwait4_(pd_copy, &status, 0, &ru);
+  EXPECT_EQ(pid_, waitrc);
+  if (verbose) {
+    fprintf(stderr, "For pd %d -> pid %d:\n", pd_, pid_);
+    print_rusage(stderr, &ru);
+  }
+  EXPECT_PID_GONE(pid_);
+
+#ifdef NOTYET
+  // Child has been reaped, so original process descriptor dangles but
+  // still has access to rusage information.
+  memset(&ru, 0, sizeof(ru));
+  EXPECT_EQ(0, pdwait4_(pd_, &status, 0, &ru));
+#endif
+  close(pd_copy);
+}
+
+TEST_F(PipePdfork, ChildExit) {
+  EXPECT_PID_ALIVE(pid_);
+  EXPECT_LT(0, TerminateChild());
+  EXPECT_PID_DEAD(pid_);
+
+  int status;
+  int rc = pdwait4_(pd_, &status, 0, NULL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid_, rc);
+  pid_ = 0;
+}
+
+#ifdef HAVE_PROC_FDINFO
+TEST_F(PipePdfork, FdInfo) {
+  char buffer[1024];
+  sprintf(buffer, "/proc/%d/fdinfo/%d", getpid_(), pd_);
+  int procfd = open(buffer, O_RDONLY);
+  EXPECT_OK(procfd);
+
+  EXPECT_OK(read(procfd, buffer, sizeof(buffer)));
+  // The fdinfo should include the file pos of the underlying file
+  EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t0")) << buffer;
+  // ...and the underlying pid
+  char pidline[256];
+  sprintf(pidline, "pid:\t%d", pid_);
+  EXPECT_NE((char*)NULL, strstr(buffer, pidline)) << buffer;
+  close(procfd);
+}
+#endif
+
+// Closing a normal process descriptor terminates the underlying process.
+TEST_F(PipePdfork, Close) {
+  sighandler_t original = signal(SIGCHLD, handle_signal);
+  EXPECT_PID_ALIVE(pid_);
+  int status;
+  EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG));
+
+  EXPECT_OK(close(pd_));
+  pd_ = -1;
+  EXPECT_FALSE(had_signal[SIGCHLD]);
+  EXPECT_PID_DEAD(pid_);
+
+#ifdef __FreeBSD__
+  EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL));
+  EXPECT_EQ(errno, ECHILD);
+#else
+  // Having closed the process descriptor means that pdwait4(pd) now doesn't work.
+  int rc = pdwait4_(pd_, &status, 0, NULL);
+  EXPECT_EQ(-1, rc);
+  EXPECT_EQ(EBADF, errno);
+
+  // Closing all process descriptors means the the child can only be reaped via pid.
+  EXPECT_EQ(pid_, waitpid(pid_, &status, __WALL|WNOHANG));
+#endif
+  signal(SIGCHLD, original);
+}
+
+TEST_F(PipePdfork, CloseLast) {
+  sighandler_t original = signal(SIGCHLD, handle_signal);
+  // Child should only die when last process descriptor is closed.
+  EXPECT_PID_ALIVE(pid_);
+  int pd_other = dup(pd_);
+
+  EXPECT_OK(close(pd_));
+  pd_ = -1;
+
+  EXPECT_PID_ALIVE(pid_);
+  int status;
+  EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG));
+
+  // Can no longer pdwait4() the closed process descriptor...
+  EXPECT_EQ(-1, pdwait4_(pd_, &status, WNOHANG, NULL));
+  EXPECT_EQ(EBADF, errno);
+  // ...but can pdwait4() the still-open process descriptor.
+  errno = 0;
+  EXPECT_EQ(0, pdwait4_(pd_other, &status, WNOHANG, NULL));
+  EXPECT_EQ(0, errno);
+
+  EXPECT_OK(close(pd_other));
+  EXPECT_PID_DEAD(pid_);
+
+  EXPECT_FALSE(had_signal[SIGCHLD]);
+  signal(SIGCHLD, original);
+}
+
+FORK_TEST(Pdfork, OtherUser) {
+  REQUIRE_ROOT();
+  int pd;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child process: loop forever.
+    while (true) usleep(100000);
+  }
+  usleep(100);
+
+  // Now that the second process has been pdfork()ed, change euid.
+  setuid(other_uid);
+  if (verbose) fprintf(stderr, "uid=%d euid=%d\n", getuid(), geteuid());
+
+  // Fail to kill child with normal PID operation.
+  EXPECT_EQ(-1, kill(pid, SIGKILL));
+  EXPECT_EQ(EPERM, errno);
+  EXPECT_PID_ALIVE(pid);
+
+  // Succeed with pdkill though.
+  EXPECT_OK(pdkill(pd, SIGKILL));
+  EXPECT_PID_ZOMBIE(pid);
+
+  int status;
+  int rc = pdwait4_(pd, &status, WNOHANG, NULL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid, rc);
+  EXPECT_TRUE(WIFSIGNALED(status));
+}
+
+TEST_F(PipePdfork, WaitPidThenPd) {
+  TerminateChild();
+  int status;
+  // If we waitpid(pid) first...
+  int rc = waitpid(pid_, &status, __WALL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid_, rc);
+
+#ifdef NOTYET
+  // ...the zombie is reaped but we can still subsequently pdwait4(pd).
+  EXPECT_EQ(0, pdwait4_(pd_, &status, 0, NULL));
+#endif
+}
+
+TEST_F(PipePdfork, WaitPdThenPid) {
+  TerminateChild();
+  int status;
+  // If we pdwait4(pd) first...
+  int rc = pdwait4_(pd_, &status, 0, NULL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid_, rc);
+
+  // ...the zombie is reaped and cannot subsequently waitpid(pid).
+  EXPECT_EQ(-1, waitpid(pid_, &status, __WALL));
+  EXPECT_EQ(ECHILD, errno);
+}
+
+// Setting PD_DAEMON prevents close() from killing the child.
+TEST_F(PipePdforkDaemon, Close) {
+  EXPECT_OK(close(pd_));
+  pd_ = -1;
+  EXPECT_PID_ALIVE(pid_);
+
+  // Can still explicitly kill it via the pid.
+  if (pid_ > 0) {
+    EXPECT_OK(kill(pid_, SIGKILL));
+    EXPECT_PID_DEAD(pid_);
+  }
+}
+
+static void TestPdkill(pid_t pid, int pd) {
+  EXPECT_PID_ALIVE(pid);
+  // SIGCONT is ignored by default.
+  EXPECT_OK(pdkill(pd, SIGCONT));
+  EXPECT_PID_ALIVE(pid);
+
+  // SIGINT isn't
+  EXPECT_OK(pdkill(pd, SIGINT));
+  EXPECT_PID_DEAD(pid);
+
+  // pdkill() on zombie is no-op.
+  errno = 0;
+  EXPECT_EQ(0, pdkill(pd, SIGINT));
+  EXPECT_EQ(0, errno);
+
+  // pdkill() on reaped process gives -ESRCH.
+  CheckChildFinished(pid, true);
+  EXPECT_EQ(-1, pdkill(pd, SIGINT));
+  EXPECT_EQ(ESRCH, errno);
+}
+
+TEST_F(PipePdfork, Pdkill) {
+  TestPdkill(pid_, pd_);
+}
+
+TEST_F(PipePdforkDaemon, Pdkill) {
+  TestPdkill(pid_, pd_);
+}
+
+TEST(Pdfork, PdkillOtherSignal) {
+  int pd = -1;
+  int pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: watch for SIGUSR1 forever.
+    had_signal.clear();
+    signal(SIGUSR1, handle_signal);
+    while (!had_signal[SIGUSR1]) {
+      usleep(100000);
+    }
+    exit(123);
+  }
+  sleep(1);
+
+  // Send an invalid signal.
+  EXPECT_EQ(-1, pdkill(pd, 0xFFFF));
+  EXPECT_EQ(EINVAL, errno);
+
+  // Send an expected SIGUSR1 to the pdfork()ed child.
+  EXPECT_PID_ALIVE(pid);
+  pdkill(pd, SIGUSR1);
+  EXPECT_PID_DEAD(pid);
+
+  // Child's exit status confirms whether it received the signal.
+  int status;
+  int rc = waitpid(pid, &status, __WALL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid, rc);
+  EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << rc;
+  EXPECT_EQ(123, WEXITSTATUS(status));
+}
+
+pid_t PdforkParentDeath(int pdfork_flags) {
+  // Set up:
+  //   pid A: main process, here
+  //   +--pid B: fork()ed process, sleep(4)s then exits
+  //      +--pid C: pdfork()ed process, looping forever
+  int sock_fds[2];
+  EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+  if (verbose) fprintf(stderr, "[%d] parent about to fork()...\n", getpid_());
+  pid_t child = fork();
+  EXPECT_OK(child);
+  if (child == 0) {
+    int pd;
+    if (verbose) fprintf(stderr, "  [%d] child about to pdfork()...\n", getpid_());
+    pid_t grandchild = pdfork(&pd, pdfork_flags);
+    if (grandchild == 0) {
+      while (true) {
+        if (verbose) fprintf(stderr, "    [%d] grandchild: \"I aten't dead\"\n", getpid_());
+        sleep(1);
+      }
+    }
+    if (verbose) fprintf(stderr, "  [%d] pdfork()ed grandchild %d, sending ID to parent\n", getpid_(), grandchild);
+    // send grandchild pid to parent
+    write(sock_fds[1], &grandchild, sizeof(grandchild));
+    sleep(4);
+    if (verbose) fprintf(stderr, "  [%d] child terminating\n", getpid_());
+    exit(0);
+  }
+  if (verbose) fprintf(stderr, "[%d] fork()ed child is %d\n", getpid_(), child);
+  pid_t grandchild;
+  read(sock_fds[0], &grandchild, sizeof(grandchild));
+  if (verbose) fprintf(stderr, "[%d] receive grandchild id %d\n", getpid_(), grandchild);
+  EXPECT_PID_ALIVE(child);
+  EXPECT_PID_ALIVE(grandchild);
+  sleep(6);
+  // Child dies, closing its process descriptor for the grandchild.
+  EXPECT_PID_DEAD(child);
+  CheckChildFinished(child);
+  return grandchild;
+}
+
+TEST(Pdfork, Bagpuss) {
+  // "And of course when Bagpuss goes to sleep, all his friends go to sleep too"
+  pid_t grandchild = PdforkParentDeath(0);
+  // By default: child death => closed process descriptor => grandchild death.
+  EXPECT_PID_DEAD(grandchild);
+}
+
+TEST(Pdfork, BagpussDaemon) {
+  pid_t grandchild = PdforkParentDeath(PD_DAEMON);
+  // With PD_DAEMON: child death => closed process descriptor => no effect on grandchild.
+  EXPECT_PID_ALIVE(grandchild);
+  if (grandchild > 0) {
+    EXPECT_OK(kill(grandchild, SIGKILL));
+  }
+}
+
+// The exit of a pdfork()ed process should not generate SIGCHLD.
+TEST_F(PipePdfork, NoSigchld) {
+  had_signal.clear();
+  sighandler_t original = signal(SIGCHLD, handle_signal);
+  TerminateChild();
+  int rc = 0;
+  // Can waitpid() for the specific pid of the pdfork()ed child.
+  EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL));
+  EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc;
+  EXPECT_FALSE(had_signal[SIGCHLD]);
+  signal(SIGCHLD, original);
+}
+
+// The exit of a pdfork()ed process whose process descriptors have
+// all been closed should generate SIGCHLD.  The child process needs
+// PD_DAEMON to survive the closure of the process descriptors.
+TEST_F(PipePdforkDaemon, NoPDSigchld) {
+  had_signal.clear();
+  sighandler_t original = signal(SIGCHLD, handle_signal);
+
+  EXPECT_OK(close(pd_));
+  TerminateChild();
+#ifdef __FreeBSD__
+  EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL));
+  EXPECT_EQ(errno, ECHILD);
+#else
+  int rc = 0;
+  // Can waitpid() for the specific pid of the pdfork()ed child.
+  EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL));
+  EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc;
+#endif
+  EXPECT_FALSE(had_signal[SIGCHLD]);
+  signal(SIGCHLD, original);
+}
+
+#ifdef HAVE_PROCDESC_FSTAT
+TEST_F(PipePdfork, ModeBits) {
+  // Owner rwx bits indicate liveness of child
+  struct stat stat;
+  memset(&stat, 0, sizeof(stat));
+  EXPECT_OK(fstat(pd_, &stat));
+  if (verbose) print_stat(stderr, &stat);
+  EXPECT_EQ(S_IRWXU, (long)(stat.st_mode & S_IRWXU));
+
+  TerminateChild();
+  usleep(100000);
+
+  memset(&stat, 0, sizeof(stat));
+  EXPECT_OK(fstat(pd_, &stat));
+  if (verbose) print_stat(stderr, &stat);
+  EXPECT_EQ(0, (int)(stat.st_mode & S_IRWXU));
+}
+#endif
+
+TEST_F(PipePdfork, WildcardWait) {
+  // TODO(FreeBSD): make wildcard wait ignore pdfork()ed children
+  // https://bugs.freebsd.org/201054
+  TerminateChild();
+  sleep(1);  // Ensure child is truly dead.
+
+  // Wildcard waitpid(-1) should not see the pdfork()ed child because
+  // there is still a process descriptor for it.
+  int rc;
+  EXPECT_EQ(-1, waitpid(-1, &rc, WNOHANG));
+  EXPECT_EQ(ECHILD, errno);
+
+  EXPECT_OK(close(pd_));
+  pd_ = -1;
+}
+
+FORK_TEST(Pdfork, Pdkill) {
+  had_signal.clear();
+  int pd;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+
+  if (pid == 0) {
+    // Child: set a SIGINT handler and sleep.
+    had_signal.clear();
+    signal(SIGINT, handle_signal);
+    if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_());
+    int left = sleep(10);
+    if (verbose) fprintf(stderr, "[%d] child slept, %d sec left, had[SIGINT]=%d\n",
+                         getpid_(), left, had_signal[SIGINT]);
+    // Expect this sleep to be interrupted by the signal (and so left > 0).
+    exit(left == 0);
+  }
+
+  // Parent: get child's PID.
+  pid_t pd_pid;
+  EXPECT_OK(pdgetpid(pd, &pd_pid));
+  EXPECT_EQ(pid, pd_pid);
+
+  // Interrupt the child after a second.
+  sleep(1);
+  EXPECT_OK(pdkill(pd, SIGINT));
+
+  // Make sure the child finished properly (caught signal then exited).
+  CheckChildFinished(pid);
+}
+
+FORK_TEST(Pdfork, PdkillSignal) {
+  int pd;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+
+  if (pid == 0) {
+    // Child: sleep.  No SIGINT handler.
+    if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_());
+    int left = sleep(10);
+    if (verbose) fprintf(stderr, "[%d] child slept, %d sec left\n", getpid_(), left);
+    exit(99);
+  }
+
+  // Kill the child (as it doesn't handle SIGINT).
+  sleep(1);
+  EXPECT_OK(pdkill(pd, SIGINT));
+
+  // Make sure the child finished properly (terminated by signal).
+  CheckChildFinished(pid, true);
+}
+
+//------------------------------------------------
+// Test interactions with other parts of Capsicum:
+//  - capability mode
+//  - capabilities
+
+FORK_TEST(Pdfork, DaemonUnrestricted) {
+  EXPECT_OK(cap_enter());
+  int fd;
+
+  // Capability mode leaves pdfork() available, with and without flag.
+  int rc;
+  rc = pdfork(&fd, PD_DAEMON);
+  EXPECT_OK(rc);
+  if (rc == 0) {
+    // Child: immediately terminate.
+    exit(0);
+  }
+
+  rc = pdfork(&fd, 0);
+  EXPECT_OK(rc);
+  if (rc == 0) {
+    // Child: immediately terminate.
+    exit(0);
+  }
+}
+
+TEST(Pdfork, MissingRights) {
+  pid_t parent = getpid_();
+  int pd = -1;
+  pid_t pid = pdfork(&pd, 0);
+  EXPECT_OK(pid);
+  if (pid == 0) {
+    // Child: loop forever.
+    EXPECT_NE(parent, getpid_());
+    while (true) sleep(1);
+  }
+  // Create two capabilities from the process descriptor.
+  cap_rights_t r_ro;
+  cap_rights_init(&r_ro, CAP_READ, CAP_LOOKUP);
+  int cap_incapable = dup(pd);
+  EXPECT_OK(cap_incapable);
+  EXPECT_OK(cap_rights_limit(cap_incapable, &r_ro));
+  cap_rights_t r_pdall;
+  cap_rights_init(&r_pdall, CAP_PDGETPID, CAP_PDWAIT, CAP_PDKILL);
+  int cap_capable = dup(pd);
+  EXPECT_OK(cap_capable);
+  EXPECT_OK(cap_rights_limit(cap_capable, &r_pdall));
+
+  pid_t other_pid;
+  EXPECT_NOTCAPABLE(pdgetpid(cap_incapable, &other_pid));
+  EXPECT_NOTCAPABLE(pdkill(cap_incapable, SIGINT));
+  int status;
+  EXPECT_NOTCAPABLE(pdwait4_(cap_incapable, &status, 0, NULL));
+
+  EXPECT_OK(pdgetpid(cap_capable, &other_pid));
+  EXPECT_EQ(pid, other_pid);
+  EXPECT_OK(pdkill(cap_capable, SIGINT));
+  int rc = pdwait4_(pd, &status, 0, NULL);
+  EXPECT_OK(rc);
+  EXPECT_EQ(pid, rc);
+}
+
+
+//------------------------------------------------
+// Passing process descriptors between processes.
+
+TEST_F(PipePdfork, PassProcessDescriptor) {
+  int sock_fds[2];
+  EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+
+  struct msghdr mh;
+  mh.msg_name = NULL;  // No address needed
+  mh.msg_namelen = 0;
+  char buffer1[1024];
+  struct iovec iov[1];
+  iov[0].iov_base = buffer1;
+  iov[0].iov_len = sizeof(buffer1);
+  mh.msg_iov = iov;
+  mh.msg_iovlen = 1;
+  char buffer2[1024];
+  mh.msg_control = buffer2;
+  mh.msg_controllen = sizeof(buffer2);
+  struct cmsghdr *cmptr;
+
+  if (verbose) fprintf(stderr, "[%d] about to fork()\n", getpid_());
+  pid_t child2 = fork();
+  if (child2 == 0) {
+    // Child: close our copy of the original process descriptor.
+    close(pd_);
+
+    // Child: wait to receive process descriptor over socket
+    if (verbose) fprintf(stderr, "  [%d] child of %d waiting for process descriptor on socket\n", getpid_(), getppid());
+    int rc = recvmsg(sock_fds[0], &mh, 0);
+    EXPECT_OK(rc);
+    EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
+    cmptr = CMSG_FIRSTHDR(&mh);
+    int pd = *(int*)CMSG_DATA(cmptr);
+    EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
+    cmptr = CMSG_NXTHDR(&mh, cmptr);
+    EXPECT_TRUE(cmptr == NULL);
+    if (verbose) fprintf(stderr, "  [%d] got process descriptor %d on socket\n", getpid_(), pd);
+
+    // Child: confirm we can do pd*() operations on the process descriptor
+    pid_t other;
+    EXPECT_OK(pdgetpid(pd, &other));
+    if (verbose) fprintf(stderr, "  [%d] process descriptor %d is pid %d\n", getpid_(), pd, other);
+
+    sleep(2);
+    if (verbose) fprintf(stderr, "  [%d] close process descriptor %d\n", getpid_(), pd);
+    close(pd);
+
+    // Last process descriptor closed, expect death
+    EXPECT_PID_DEAD(other);
+
+    exit(HasFailure());
+  }
+  usleep(1000);  // Ensure subprocess runs
+
+  // Send the process descriptor over the pipe to the sub-process
+  mh.msg_controllen = CMSG_LEN(sizeof(int));
+  cmptr = CMSG_FIRSTHDR(&mh);
+  cmptr->cmsg_level = SOL_SOCKET;
+  cmptr->cmsg_type = SCM_RIGHTS;
+  cmptr->cmsg_len = CMSG_LEN(sizeof(int));
+  *(int *)CMSG_DATA(cmptr) = pd_;
+  buffer1[0] = 0;
+  iov[0].iov_len = 1;
+  sleep(1);
+  if (verbose) fprintf(stderr, "[%d] send process descriptor %d on socket\n", getpid_(), pd_);
+  int rc = sendmsg(sock_fds[1], &mh, 0);
+  EXPECT_OK(rc);
+
+  if (verbose) fprintf(stderr, "[%d] close process descriptor %d\n", getpid_(), pd_);
+  close(pd_);  // Not last open process descriptor
+
+  // wait for child2
+  int status;
+  EXPECT_EQ(child2, waitpid(child2, &status, __WALL));
+  rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  // confirm death all round
+  EXPECT_PID_DEAD(child2);
+  EXPECT_PID_DEAD(pid_);
+}
--- a/contrib/capsicum-test/rename.cc
+++ b/contrib/capsicum-test/rename.cc
@ -0,0 +1,49 @@
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "./capsicum-test.h"
+
+// There was a Capsicum-related regression in FreeBSD renameat,
+// which affects certain cases independent of Capsicum or capability mode
+//
+// added to test the renameat syscall for the case that
+//    - the "to" file already exists
+//    - the "to" file is specified by an absolute path
+//    - the "to" file descriptor is used
+//          (this descriptor should be ignored if absolute path is provided)
+//
+// details at: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=222258
+
+
+const char * create_tmp_src(const char* filename) {
+    const char *src_path = TmpFile(filename);
+    int src_fd = open(src_path, O_CREAT|O_RDWR, 0644);
+    close(src_fd);
+    return src_path;
+}
+
+TEST(Rename, AbsDesignationSame) {
+    const char *src_path = create_tmp_src("rename_test");
+    EXPECT_OK(rename(src_path, src_path));
+    unlink(src_path);
+}
+
+TEST(RenameAt, AbsDesignationSame) {
+    const char *src_path = create_tmp_src("renameat_test");
+    const char *dir_path = TmpFile("renameat_test_dir");
+
+    EXPECT_OK(mkdir(dir_path, 0755));
+    // random temporary directory descriptor
+    int dfd = open(dir_path, O_DIRECTORY);
+
+    // Various rename from/to the same absolute path; in each case the source
+    // and dest directory FDs should be irrelevant.
+    EXPECT_OK(renameat(AT_FDCWD, src_path, AT_FDCWD, src_path));
+    EXPECT_OK(renameat(AT_FDCWD, src_path, dfd, src_path));
+    EXPECT_OK(renameat(dfd, src_path, AT_FDCWD, src_path));
+    EXPECT_OK(renameat(dfd, src_path, dfd, src_path));
+
+    close(dfd);
+    rmdir(dir_path);
+    unlink(src_path);
+}
--- a/contrib/capsicum-test/sctp.cc
+++ b/contrib/capsicum-test/sctp.cc
@ -0,0 +1,212 @@
+// Tests of SCTP functionality
+// Requires: libsctp-dev package on Debian Linux, CONFIG_IP_SCTP in kernel config
+#ifdef HAVE_SCTP
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/sctp.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+
+#include "syscalls.h"
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+static cap_rights_t r_ro;
+static cap_rights_t r_wo;
+static cap_rights_t r_rw;
+static cap_rights_t r_all;
+static cap_rights_t r_all_nopeel;
+#define DO_PEELOFF 0x1A
+#define DO_TERM    0x1B
+
+static int SctpClient(int port, unsigned char byte) {
+  // Create sockets
+  int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+  EXPECT_OK(sock);
+  if (sock < 0) return sock;
+  int cap_sock_ro = dup(sock);
+  EXPECT_OK(cap_sock_ro);
+  EXPECT_OK(cap_rights_limit(cap_sock_ro, &r_rw));
+  int cap_sock_rw = dup(sock);
+  EXPECT_OK(cap_sock_rw);
+  EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+  int cap_sock_all = dup(sock);
+  EXPECT_OK(cap_sock_all);
+  EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+  close(sock);
+
+  // Send a message.  Requires CAP_WRITE and CAP_CONNECT.
+  struct sockaddr_in serv_addr;
+  memset(&serv_addr, 0, sizeof(serv_addr));
+  serv_addr.sin_family = AF_INET;
+  serv_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+  serv_addr.sin_port = htons(port);
+
+  EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_ro, &byte, 1,
+                                 (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+                                 0, 0, 1, 0, 0));
+  EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_rw, &byte, 1,
+                                 (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+                                 0, 0, 1, 0, 0));
+  if (verbose) fprintf(stderr, "  [%d]sctp_sendmsg(%02x)\n", getpid_(), byte);
+  EXPECT_OK(sctp_sendmsg(cap_sock_all, &byte, 1,
+                         (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+                         0, 0, 1, 0, 0));
+  close(cap_sock_ro);
+  close(cap_sock_rw);
+  return cap_sock_all;
+}
+
+
+TEST(Sctp, Socket) {
+  int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+  EXPECT_OK(sock);
+  if (sock < 0) return;
+
+  cap_rights_init(&r_ro, CAP_READ);
+  cap_rights_init(&r_wo, CAP_WRITE);
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+  cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+  cap_rights_init(&r_all_nopeel, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+  cap_rights_clear(&r_all_nopeel, CAP_PEELOFF);
+
+  int cap_sock_wo = dup(sock);
+  EXPECT_OK(cap_sock_wo);
+  EXPECT_OK(cap_rights_limit(cap_sock_wo, &r_wo));
+  int cap_sock_rw = dup(sock);
+  EXPECT_OK(cap_sock_rw);
+  EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+  int cap_sock_all = dup(sock);
+  EXPECT_OK(cap_sock_all);
+  EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+  int cap_sock_all_nopeel = dup(sock);
+  EXPECT_OK(cap_sock_all_nopeel);
+  EXPECT_OK(cap_rights_limit(cap_sock_all_nopeel, &r_all_nopeel));
+  close(sock);
+
+  struct sockaddr_in addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sin_family = AF_INET;
+  addr.sin_port = htons(0);
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  socklen_t len = sizeof(addr);
+
+  // Can only bind the fully-capable socket.
+  EXPECT_NOTCAPABLE(bind(cap_sock_rw, (struct sockaddr *)&addr, len));
+  EXPECT_OK(bind(cap_sock_all, (struct sockaddr *)&addr, len));
+
+  EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr *)&addr, &len));
+  int port = ntohs(addr.sin_port);
+
+  // Now we know the port involved, fork off children to run clients.
+  pid_t child1 = fork();
+  if (child1 == 0) {
+    // Child process 1: wait for server setup
+    sleep(1);
+    // Send a message that triggers peeloff
+    int client_sock = SctpClient(port, DO_PEELOFF);
+    sleep(1);
+    close(client_sock);
+    exit(HasFailure());
+  }
+
+  pid_t child2 = fork();
+  if (child2 == 0) {
+    // Child process 2: wait for server setup
+    sleep(2);
+    // Send a message that triggers server exit
+    int client_sock = SctpClient(port, DO_TERM);
+    close(client_sock);
+    exit(HasFailure());
+  }
+
+  // Can only listen on the fully-capable socket.
+  EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+  EXPECT_OK(listen(cap_sock_all, 3));
+
+  // Can only do socket operations on the fully-capable socket.
+  len = sizeof(addr);
+  EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+
+  struct sctp_event_subscribe events;
+  memset(&events, 0, sizeof(events));
+  events.sctp_association_event = 1;
+  events.sctp_data_io_event = 1;
+  EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events)));
+  len = sizeof(events);
+  EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, &len));
+  memset(&events, 0, sizeof(events));
+  events.sctp_association_event = 1;
+  events.sctp_data_io_event = 1;
+  EXPECT_OK(setsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events)));
+  len = sizeof(events);
+  EXPECT_OK(getsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, &len));
+
+  len = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+  EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+  EXPECT_EQ(AF_INET, addr.sin_family);
+  EXPECT_EQ(htons(port), addr.sin_port);
+
+  struct sockaddr_in client_addr;
+  socklen_t addr_len = sizeof(client_addr);
+  unsigned char buffer[1024];
+  struct sctp_sndrcvinfo sri;
+  memset(&sri, 0, sizeof(sri));
+  int flags = 0;
+  EXPECT_NOTCAPABLE(sctp_recvmsg(cap_sock_wo, buffer, sizeof(buffer),
+                                 (struct sockaddr*)&client_addr, &addr_len,
+                                 &sri, &flags));
+  while (true) {
+  retry:
+    memset(&sri, 0, sizeof(sri));
+    int len = sctp_recvmsg(cap_sock_rw, buffer, sizeof(buffer),
+                           (struct sockaddr*)&client_addr, &addr_len,
+                           &sri, &flags);
+    if (len < 0 && errno == EAGAIN) goto retry;
+    EXPECT_OK(len);
+    if (len > 0) {
+      if (verbose) fprintf(stderr, "[%d]sctp_recvmsg(%02x..)", getpid_(), (unsigned)buffer[0]);
+      if (buffer[0] == DO_PEELOFF) {
+        if (verbose) fprintf(stderr, "..peeling off association %08lx\n", (long)sri.sinfo_assoc_id);
+        // Peel off the association.  Needs CAP_PEELOFF.
+        int rc1 = sctp_peeloff(cap_sock_all_nopeel, sri.sinfo_assoc_id);
+        EXPECT_NOTCAPABLE(rc1);
+        int rc2 = sctp_peeloff(cap_sock_all, sri.sinfo_assoc_id);
+        EXPECT_OK(rc2);
+        int peeled = std::max(rc1, rc2);
+        if (peeled > 0) {
+#ifdef CAP_FROM_PEELOFF
+          // Peeled off FD should have same rights as original socket.
+          cap_rights_t rights;
+          EXPECT_OK(cap_rights_get(peeled, &rights));
+          EXPECT_RIGHTS_EQ(&r_all, &rights);
+#endif
+          close(peeled);
+        }
+      } else if (buffer[0] == DO_TERM) {
+        if (verbose) fprintf(stderr, "..terminating server\n");
+        break;
+      }
+    } else if (len < 0) {
+      break;
+    }
+  }
+
+  // Wait for the children.
+  int status;
+  int rc;
+  EXPECT_EQ(child1, waitpid(child1, &status, 0));
+  rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+  EXPECT_EQ(child2, waitpid(child2, &status, 0));
+  rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  close(cap_sock_wo);
+  close(cap_sock_rw);
+  close(cap_sock_all);
+  close(cap_sock_all_nopeel);
+}
+#endif
--- a/contrib/capsicum-test/select.cc
+++ b/contrib/capsicum-test/select.cc
@ -0,0 +1,142 @@
+#include <sys/select.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <poll.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+namespace {
+
+int AddFDToSet(fd_set* fset, int fd, int maxfd) {
+  FD_SET(fd, fset);
+  if (fd > maxfd) maxfd = fd;
+  return maxfd;
+}
+
+int InitFDSet(fd_set* fset, int *fds, int fdcount) {
+  FD_ZERO(fset);
+  int maxfd = -1;
+  for (int ii = 0; ii < fdcount; ii++) {
+    maxfd = AddFDToSet(fset, fds[ii], maxfd);
+  }
+  return maxfd;
+}
+
+}  // namespace
+
+FORK_TEST_ON(Select, LotsOFileDescriptors, TmpFile("cap_select")) {
+  int fd = open(TmpFile("cap_select"), O_RDWR | O_CREAT, 0644);
+  EXPECT_OK(fd);
+  if (fd < 0) return;
+
+  // Create many POLL_EVENT capabilities.
+  const int kCapCount = 64;
+  int cap_fd[kCapCount];
+  cap_rights_t r_poll;
+  cap_rights_init(&r_poll, CAP_EVENT);
+  for (int ii = 0; ii < kCapCount; ii++) {
+    cap_fd[ii] = dup(fd);
+    EXPECT_OK(cap_fd[ii]);
+    EXPECT_OK(cap_rights_limit(cap_fd[ii], &r_poll));
+  }
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK);
+  int cap_rw = dup(fd);
+  EXPECT_OK(cap_rw);
+  EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+
+  EXPECT_OK(cap_enter());  // Enter capability mode
+
+  struct timeval tv;
+  tv.tv_sec = 0;
+  tv.tv_usec = 100;
+  // Add normal file descriptor and all CAP_EVENT capabilities
+  fd_set rset;
+  fd_set wset;
+  int maxfd = InitFDSet(&rset, cap_fd, kCapCount);
+  maxfd = AddFDToSet(&rset, fd, maxfd);
+  InitFDSet(&wset, cap_fd, kCapCount);
+  AddFDToSet(&rset, fd, 0);
+  int ret = select(maxfd+1, &rset, &wset, NULL, &tv);
+  EXPECT_OK(ret);
+
+  // Now also include the capability with no CAP_EVENT.
+  InitFDSet(&rset, cap_fd, kCapCount);
+  AddFDToSet(&rset, fd, maxfd);
+  maxfd = AddFDToSet(&rset, cap_rw, maxfd);
+  InitFDSet(&wset, cap_fd, kCapCount);
+  AddFDToSet(&wset, fd, maxfd);
+  AddFDToSet(&wset, cap_rw, maxfd);
+  ret = select(maxfd+1, &rset, &wset, NULL, &tv);
+  EXPECT_NOTCAPABLE(ret);
+
+#ifdef HAVE_PSELECT
+  // And again with pselect
+  struct timespec ts;
+  ts.tv_sec = 0;
+  ts.tv_nsec = 100000;
+  maxfd = InitFDSet(&rset, cap_fd, kCapCount);
+  maxfd = AddFDToSet(&rset, fd, maxfd);
+  InitFDSet(&wset, cap_fd, kCapCount);
+  AddFDToSet(&rset, fd, 0);
+  ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL);
+  EXPECT_OK(ret);
+
+  InitFDSet(&rset, cap_fd, kCapCount);
+  AddFDToSet(&rset, fd, maxfd);
+  maxfd = AddFDToSet(&rset, cap_rw, maxfd);
+  InitFDSet(&wset, cap_fd, kCapCount);
+  AddFDToSet(&wset, fd, maxfd);
+  AddFDToSet(&wset, cap_rw, maxfd);
+  ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL);
+  EXPECT_NOTCAPABLE(ret);
+#endif
+}
+
+FORK_TEST_ON(Poll, LotsOFileDescriptors, TmpFile("cap_poll")) {
+  int fd = open(TmpFile("cap_poll"), O_RDWR | O_CREAT, 0644);
+  EXPECT_OK(fd);
+  if (fd < 0) return;
+
+  // Create many POLL_EVENT capabilities.
+  const int kCapCount = 64;
+  struct pollfd cap_fd[kCapCount + 2];
+  cap_rights_t r_poll;
+  cap_rights_init(&r_poll, CAP_EVENT);
+  for (int ii = 0; ii < kCapCount; ii++) {
+    cap_fd[ii].fd = dup(fd);
+    EXPECT_OK(cap_fd[ii].fd);
+    EXPECT_OK(cap_rights_limit(cap_fd[ii].fd, &r_poll));
+    cap_fd[ii].events = POLLIN|POLLOUT;
+  }
+  cap_fd[kCapCount].fd = fd;
+  cap_fd[kCapCount].events = POLLIN|POLLOUT;
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK);
+  int cap_rw = dup(fd);
+  EXPECT_OK(cap_rw);
+  EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+  cap_fd[kCapCount + 1].fd = cap_rw;
+  cap_fd[kCapCount + 1].events = POLLIN|POLLOUT;
+
+  EXPECT_OK(cap_enter());  // Enter capability mode
+
+  EXPECT_OK(poll(cap_fd, kCapCount + 1, 10));
+  // Now also include the capability with no CAP_EVENT.
+  EXPECT_OK(poll(cap_fd, kCapCount + 2, 10));
+  EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL));
+
+#ifdef HAVE_PPOLL
+  // And again with ppoll
+  struct timespec ts;
+  ts.tv_sec = 0;
+  ts.tv_nsec = 100000;
+  EXPECT_OK(ppoll(cap_fd, kCapCount + 1, &ts, NULL));
+  // Now also include the capability with no CAP_EVENT.
+  EXPECT_OK(ppoll(cap_fd, kCapCount + 2, &ts, NULL));
+  EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL));
+#endif
+}
--- a/contrib/capsicum-test/showrights
+++ b/contrib/capsicum-test/showrights
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+import sys
+import re
+
+_values = {  # 2-tuple => name
+  (0x0000000000000000, 0x0000000000000100) : 'TTYHOOK',
+  (0x0000000000000040, 0x0000000000000000) : 'CREATE',
+  (0x0000000200000000, 0x0000000000000000) : 'GETSOCKNAME',
+  (0x0000000000000000, 0x0000000000100000) : 'KQUEUE_CHANGE',
+  (0x0000000000000000, 0x0000000000004000) : 'EXTATTR_LIST',
+  (0x0000000000000080, 0x0000000000000000) : 'FEXECVE',
+  (0x0000001000000000, 0x0000000000000000) : 'PEELOFF',
+  (0x0000000000000000, 0x0000000000800000) : 'NOTIFY',
+  (0x0000000000000000, 0x0000000000001000) : 'EXTATTR_DELETE',
+  (0x0000000040000000, 0x0000000000000000) : 'BIND',
+  (0x0000000000000000, 0x0000000000002000) : 'EXTATTR_GET',
+  (0x0000000000008000, 0x0000000000000000) : 'FCNTL',
+  (0x0000000000000000, 0x0000000000400000) : 'EPOLL_CTL',
+  (0x0000000000000004, 0x0000000000000000) : 'SEEK_TELL',
+  (0x000000000000000c, 0x0000000000000000) : 'SEEK',
+  (0x0000004000000000, 0x0000000000000000) : 'SHUTDOWN',
+  (0x0000000000000000, 0x0000000000000080) : 'IOCTL',
+  (0x0000000000000000, 0x0000000000000020) : 'EVENT',
+  (0x0000000400000000, 0x0000000000000000) : 'GETSOCKOPT',
+  (0x0000000080000000, 0x0000000000000000) : 'CONNECT',
+  (0x0000000000000000, 0x0000000000200000) : 'FSIGNAL',
+  (0x0000000000000000, 0x0000000000008000) : 'EXTATTR_SET',
+  (0x0000000000100000, 0x0000000000000000) : 'FSTATFS',
+  (0x0000000000040000, 0x0000000000000000) : 'FSCK',
+  (0x0000000000000000, 0x0000000000000800) : 'PDKILL_FREEBSD',
+  (0x0000000000000000, 0x0000000000000004) : 'SEM_GETVALUE',
+  (0x0000000000000000, 0x0000000000080000) : 'ACL_SET',
+  (0x0000000000200000, 0x0000000000000000) : 'FUTIMES',
+  (0x0000000000000200, 0x0000000000000000) : 'FTRUNCATE',
+  (0x0000000000000000, 0x0000000000000001) : 'MAC_GET',
+  (0x0000000000020000, 0x0000000000000000) : 'FPATHCONF',
+  (0x0000002000000000, 0x0000000000000000) : 'SETSOCKOPT',
+  (0x0000000000002000, 0x0000000000000000) : 'FCHMOD',
+  (0x0000000000000000, 0x0000000002000000) : 'PERFMON',
+  (0x0000000000004000, 0x0000000000000000) : 'FCHOWN',
+  (0x0000000000000400, 0x0000000000000000) : 'LOOKUP',
+  (0x0000000000400400, 0x0000000000000000) : 'LINKAT_TARGET',
+  (0x0000000000800400, 0x0000000000000000) : 'MKDIRAT',
+  (0x0000000001000400, 0x0000000000000000) : 'MKFIFOAT',
+  (0x0000000002000400, 0x0000000000000000) : 'MKNODAT',
+  (0x0000000004000400, 0x0000000000000000) : 'RENAMEAT_SOURCE',
+  (0x0000000008000400, 0x0000000000000000) : 'SYMLINKAT',
+  (0x0000000010000400, 0x0000000000000000) : 'UNLINKAT',
+  (0x0000008000000400, 0x0000000000000000) : 'BINDAT',
+  (0x0000010000000400, 0x0000000000000000) : 'CONNECTAT',
+  (0x0000020000000400, 0x0000000000000000) : 'LINKAT_SOURCE',
+  (0x0000040000000400, 0x0000000000000000) : 'RENAMEAT_TARGET',
+  (0x0000000000000010, 0x0000000000000000) : 'MMAP',
+  (0x000000000000003c, 0x0000000000000000) : 'MMAP_X',
+  (0x0000000000000000, 0x0000000001000000) : 'SETNS',
+  (0x0000000000080000, 0x0000000000000000) : 'FSTAT',
+  (0x0000000000000001, 0x0000000000000000) : 'READ',
+  (0x0000000000000000, 0x0000000000000008) : 'SEM_POST',
+  (0x0000000000000000, 0x0000000000020000) : 'ACL_DELETE',
+  (0x0000000000001000, 0x0000000000000000) : 'FCHFLAGS',
+  (0x0000000800000000, 0x0000000000000000) : 'LISTEN',
+  (0x0000000100000000, 0x0000000000000000) : 'GETPEERNAME',
+  (0x0000000000000100, 0x0000000000000000) : 'FSYNC',
+  (0x0000000000000000, 0x0000000004000000) : 'BPF',
+  (0x0000000020000000, 0x0000000000000000) : 'ACCEPT',
+  (0x0000000000000800, 0x0000000000000000) : 'FCHDIR',
+  (0x0000000000000002, 0x0000000000000000) : 'WRITE',
+  (0x0000000000000000, 0x0000000000000010) : 'SEM_WAIT',
+  (0x0000000000000000, 0x0000000000000040) : 'KQUEUE_EVENT',
+  (0x0000000000000000, 0x0000000000000400) : 'PDWAIT',
+  (0x0000000000000000, 0x0000000000040000) : 'ACL_GET',
+  (0x0000000000010000, 0x0000000000000000) : 'FLOCK',
+  (0x0000000000000000, 0x0000000000010000) : 'ACL_CHECK',
+  (0x0000000000000000, 0x0000000000000002) : 'MAC_SET',
+  (0x0000000000000000, 0x0000000000000200) : 'PDGETPID_FREEBSD',
+}
+
+
+def _map_fdinfo(line):
+    RIGHTS_RE = re.compile(r'(?P<prefix>.*)rights:(?P<ws>\s+)0x(?P<v0>[0-9a-fA-F]+)\s+0x(?P<v1>[0-9a-fA-F]+)$')
+    m = RIGHTS_RE.match(line)
+    if m:
+        val0 = long(m.group('v0'), 16)
+        val0 = (val0 & ~(0x0200000000000000L))
+        val1 = long(m.group('v1'), 16)
+        val1 = (val1 & ~(0x0400000000000000L))
+        rights = []
+        for (right, name) in _values.items():
+            if ((right[0] == 0 or (val0 & right[0])) and
+                (right[1] == 0 or (val1 & right[1]))):
+                rights.append(name)
+        return "%srights:%s%s" % (m.group('prefix'), m.group('ws'), '|'.join(rights))
+    else:
+        return line.rstrip()
+
+if __name__ == "__main__":
+    infile = open(sys.argv[1], 'r') if len(sys.argv) > 1 else sys.stdin
+    for line in infile.readlines():
+        print _map_fdinfo(line)
--- a/contrib/capsicum-test/smoketest.c
+++ b/contrib/capsicum-test/smoketest.c
@ -0,0 +1,135 @@
+/* Small standalone test program to check the existence of Capsicum syscalls */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "capsicum.h"
+
+#ifdef __linux__
+// glibc on Linux caches getpid() return value.
+int getpid_(void) { return syscall(__NR_getpid); }
+#else
+#define getpid_ getpid
+#endif
+
+static int seen_sigchld = 0;
+static void handle_signal(int x) {
+  fprintf(stderr, "[%d] received SIGCHLD\n", getpid_());
+  seen_sigchld = 1;
+}
+
+int main(int argc, char *argv[]) {
+  signal(SIGCHLD, handle_signal);
+  int lifetime = 4; /* seconds */
+  if (1 < argc) {
+    lifetime = atoi(argv[1]);
+  }
+
+  /* cap_rights_limit() available? */
+  cap_rights_t r_rws;
+  cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+  int cap_fd = dup(STDOUT_FILENO);
+  int rc = cap_rights_limit(cap_fd, &r_rws);
+  fprintf(stderr, "[%d] cap_fd=%d\n", getpid_(), cap_fd);
+  if (rc < 0) fprintf(stderr, "*** cap_rights_limit() failed: errno=%d %s\n", errno, strerror(errno));
+
+  /* cap_rights_get() available? */
+  cap_rights_t rights;
+  cap_rights_init(&rights, 0);
+  rc = cap_rights_get(cap_fd, &rights);
+  char buffer[256];
+  cap_rights_describe(&rights, buffer);
+  fprintf(stderr, "[%d] cap_rights_get(cap_fd=%d) rc=%d rights=%s\n", getpid_(), cap_fd, rc, buffer);
+  if (rc < 0) fprintf(stderr, "*** cap_rights_get() failed: errno=%d %s\n", errno, strerror(errno));
+
+  /* fstat() policed? */
+  struct stat buf;
+  rc = fstat(cap_fd, &buf);
+  fprintf(stderr, "[%d] fstat(cap_fd=%d) rc=%d errno=%d\n", getpid_(), cap_fd, rc, errno);
+  if (rc != -1) fprintf(stderr, "*** fstat() unexpectedly succeeded\n");
+
+  /* pdfork() available? */
+  int pd = -1;
+  rc = pdfork(&pd, 0);
+  if (rc < 0) fprintf(stderr, "*** pdfork() failed: errno=%d %s\n", errno, strerror(errno));
+
+  if (rc == 0) { /* child */
+    int count = 0;
+    while (count < 20) {
+      fprintf(stderr, "  [%d] child alive, parent is ppid=%d\n", getpid_(), getppid());
+      sleep(1);
+    }
+    fprintf(stderr, "  [%d] child exit(0)\n", getpid_());
+    exit(0);
+  }
+  fprintf(stderr, "[%d] pdfork() rc=%d pd=%d\n", getpid_(), rc, pd);
+
+  /* pdgetpid() available? */
+  pid_t actual_pid = rc;
+  pid_t got_pid = -1;
+  rc = pdgetpid(pd, &got_pid);
+  if (rc < 0) fprintf(stderr, "*** pdgetpid(pd=%d) failed: errno=%d %s\n", pd, errno, strerror(errno));
+  fprintf(stderr, "[%d] pdgetpid(pd=%d)=%d, pdfork returned %d\n", getpid_(), pd, got_pid, actual_pid);
+
+  sleep(lifetime);
+
+  /* pdkill() available? */
+  rc = pdkill(pd, SIGKILL);
+  fprintf(stderr, "[%d] pdkill(pd=%d, SIGKILL) -> rc=%d\n", getpid_(), pd, rc);
+  if (rc < 0) fprintf(stderr, "*** pdkill() failed: errno=%d %s\n", errno, strerror(errno));
+  usleep(50000);  /* Allow time for death and signals */
+
+  /* Death of a pdforked child should be invisible */
+  if (seen_sigchld) fprintf(stderr, "*** SIGCHLD emitted\n");
+  int status;
+  rc = wait4(-1, &status, WNOHANG, NULL);
+  if (rc > 0) fprintf(stderr, "*** wait4(-1, ...) unexpectedly found child %d\n", rc);
+
+  fprintf(stderr, "[%d] forking off a child process to check cap_enter()\n", getpid_());
+  pid_t child = fork();
+  if (child == 0) { /* child */
+    /* cap_getmode() / cap_enter() available? */
+    unsigned int cap_mode = -1;
+    rc = cap_getmode(&cap_mode);
+    fprintf(stderr, "  [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode);
+    if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno));
+
+    rc = cap_enter();
+    fprintf(stderr, "  [%d] cap_enter() -> rc=%d\n", getpid_(), rc);
+    if (rc < 0) fprintf(stderr, "*** cap_enter() failed: errno=%d %s\n", errno, strerror(errno));
+
+    rc = cap_getmode(&cap_mode);
+    fprintf(stderr, "  [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode);
+    if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno));
+
+    /* open disallowed? */
+    rc = open("/etc/passwd", O_RDONLY);
+    fprintf(stderr, "  [%d] open('/etc/passwd') -> rc=%d, errno=%d\n", getpid_(), rc, errno);
+    if (rc != -1) fprintf(stderr, "*** open() unexpectedly succeeded\n");
+#ifdef ECAPMODE
+    if (errno != ECAPMODE) fprintf(stderr, "*** open() failed with errno %d not ECAPMODE\n", errno);
+#endif
+    exit(0);
+  }
+  rc = wait4(child, &status, 0, NULL);
+  fprintf(stderr, "[%d] child %d exited with status %x\n", getpid_(), child, status);
+
+  /* fexecve() available? */
+  char* argv_pass[] = {(char*)"/bin/ls", "-l", "smoketest", NULL};
+  char* null_envp[] = {NULL};
+  int ls_bin = open("/bin/ls", O_RDONLY);
+  fprintf(stderr, "[%d] about to fexecve('/bin/ls', '-l', 'smoketest')\n", getpid_());
+  rc = fexecve(ls_bin, argv_pass, null_envp);
+  /* should never reach here */
+  fprintf(stderr, "*** fexecve(fd=%d) failed: rc=%d errno=%d %s\n", ls_bin, rc, errno, strerror(errno));
+
+  return 0;
+}
--- a/contrib/capsicum-test/socket.cc
+++ b/contrib/capsicum-test/socket.cc
@ -0,0 +1,340 @@
+// Tests for socket functionality.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+TEST(Socket, UnixDomain) {
+  const char* socketName = TmpFile("capsicum-test.socket");
+  unlink(socketName);
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+  cap_rights_t r_all;
+  cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+
+  pid_t child = fork();
+  if (child == 0) {
+    // Child process: wait for server setup
+    sleep(1);
+
+    // Create sockets
+    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    EXPECT_OK(sock);
+    if (sock < 0) return;
+
+    int cap_sock_rw = dup(sock);
+    EXPECT_OK(cap_sock_rw);
+    EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+    int cap_sock_all = dup(sock);
+    EXPECT_OK(cap_sock_all);
+    EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+    EXPECT_OK(close(sock));
+
+    // Connect socket
+    struct sockaddr_un un;
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    strcpy(un.sun_path, socketName);
+    socklen_t len = sizeof(un);
+    EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&un, len));
+    EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&un, len));
+
+    exit(HasFailure());
+  }
+
+  int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+  EXPECT_OK(sock);
+  if (sock < 0) return;
+
+  int cap_sock_rw = dup(sock);
+  EXPECT_OK(cap_sock_rw);
+  EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+  int cap_sock_all = dup(sock);
+  EXPECT_OK(cap_sock_all);
+  EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+  EXPECT_OK(close(sock));
+
+  struct sockaddr_un un;
+  memset(&un, 0, sizeof(un));
+  un.sun_family = AF_UNIX;
+  strcpy(un.sun_path, socketName);
+  socklen_t len =  (sizeof(un) - sizeof(un.sun_path) + strlen(un.sun_path));
+
+  // Can only bind the fully-capable socket.
+  EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&un, len));
+  EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&un, len));
+
+  // Can only listen on the fully-capable socket.
+  EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+  EXPECT_OK(listen(cap_sock_all, 3));
+
+  // Can only do socket operations on the fully-capable socket.
+  len = sizeof(un);
+  EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&un, &len));
+  int value = 0;
+  EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, &len));
+
+  len = sizeof(un);
+  memset(&un, 0, sizeof(un));
+  EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&un, &len));
+  EXPECT_EQ(AF_UNIX, un.sun_family);
+  EXPECT_EQ(std::string(socketName), std::string(un.sun_path));
+  value = 0;
+  EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, &len));
+
+  // Accept the incoming connection
+  len = sizeof(un);
+  memset(&un, 0, sizeof(un));
+  EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&un, &len));
+  int conn_fd = accept(cap_sock_all, (struct sockaddr *)&un, &len);
+  EXPECT_OK(conn_fd);
+
+#ifdef CAP_FROM_ACCEPT
+  // New connection should also be a capability.
+  cap_rights_t rights;
+  cap_rights_init(&rights, 0);
+  EXPECT_OK(cap_rights_get(conn_fd, &rights));
+  EXPECT_RIGHTS_IN(&rights, &r_all);
+#endif
+
+  // Wait for the child.
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  close(conn_fd);
+  close(cap_sock_rw);
+  close(cap_sock_all);
+  unlink(socketName);
+}
+
+TEST(Socket, TCP) {
+  int sock = socket(AF_INET, SOCK_STREAM, 0);
+  EXPECT_OK(sock);
+  if (sock < 0) return;
+
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+  cap_rights_t r_all;
+  cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+
+  int cap_sock_rw = dup(sock);
+  EXPECT_OK(cap_sock_rw);
+  EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+  int cap_sock_all = dup(sock);
+  EXPECT_OK(cap_sock_all);
+  EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+  close(sock);
+
+  struct sockaddr_in addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sin_family = AF_INET;
+  addr.sin_port = htons(0);
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  socklen_t len = sizeof(addr);
+
+  // Can only bind the fully-capable socket.
+  EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len));
+  EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len));
+
+  getsockname(cap_sock_all, (struct sockaddr *)&addr, &len);
+  int port = ntohs(addr.sin_port);
+
+  // Now we know the port involved, fork off a child.
+  pid_t child = fork();
+  if (child == 0) {
+    // Child process: wait for server setup
+    sleep(1);
+
+    // Create sockets
+    int sock = socket(AF_INET, SOCK_STREAM, 0);
+    EXPECT_OK(sock);
+    if (sock < 0) return;
+    int cap_sock_rw = dup(sock);
+    EXPECT_OK(cap_sock_rw);
+    EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+    int cap_sock_all = dup(sock);
+    EXPECT_OK(cap_sock_all);
+    EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+    close(sock);
+
+    // Connect socket
+    struct sockaddr_in addr;
+    memset(&addr, 0, sizeof(addr));
+    addr.sin_family = AF_INET;
+    addr.sin_port = htons(port);  // Pick unused port
+    addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+    socklen_t len = sizeof(addr);
+    EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&addr, len));
+    EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&addr, len));
+
+    exit(HasFailure());
+  }
+
+  // Can only listen on the fully-capable socket.
+  EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+  EXPECT_OK(listen(cap_sock_all, 3));
+
+  // Can only do socket operations on the fully-capable socket.
+  len = sizeof(addr);
+  EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+  int value = 1;
+  EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+  len = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+  EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+  EXPECT_EQ(AF_INET, addr.sin_family);
+  EXPECT_EQ(htons(port), addr.sin_port);
+  value = 0;
+  EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+  // Accept the incoming connection
+  len = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+  EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&addr, &len));
+  int conn_fd = accept(cap_sock_all, (struct sockaddr *)&addr, &len);
+  EXPECT_OK(conn_fd);
+
+#ifdef CAP_FROM_ACCEPT
+  // New connection should also be a capability.
+  cap_rights_t rights;
+  cap_rights_init(&rights, 0);
+  EXPECT_OK(cap_rights_get(conn_fd, &rights));
+  EXPECT_RIGHTS_IN(&rights, &r_all);
+#endif
+
+  // Wait for the child.
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  close(conn_fd);
+  close(cap_sock_rw);
+  close(cap_sock_all);
+}
+
+TEST(Socket, UDP) {
+  int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+  EXPECT_OK(sock);
+  if (sock < 0) return;
+
+  cap_rights_t r_rw;
+  cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+  cap_rights_t r_all;
+  cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+  cap_rights_t r_connect;
+  cap_rights_init(&r_connect, CAP_READ, CAP_WRITE, CAP_CONNECT);
+
+  int cap_sock_rw = dup(sock);
+  EXPECT_OK(cap_sock_rw);
+  EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+  int cap_sock_all = dup(sock);
+  EXPECT_OK(cap_sock_all);
+  EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+  close(sock);
+
+  struct sockaddr_in addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sin_family = AF_INET;
+  addr.sin_port = htons(0);
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  socklen_t len = sizeof(addr);
+
+  // Can only bind the fully-capable socket.
+  EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len));
+  EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len));
+  getsockname(cap_sock_all, (struct sockaddr *)&addr, &len);
+  int port = ntohs(addr.sin_port);
+
+  // Can only do socket operations on the fully-capable socket.
+  len = sizeof(addr);
+  EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+  int value = 1;
+  EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+  len = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+  EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+  EXPECT_EQ(AF_INET, addr.sin_family);
+  EXPECT_EQ(htons(port), addr.sin_port);
+  value = 1;
+  EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+  len = sizeof(value);
+  EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+  pid_t child = fork();
+  if (child == 0) {
+    int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+    EXPECT_OK(sock);
+    int cap_sock_rw = dup(sock);
+    EXPECT_OK(cap_sock_rw);
+    EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+    int cap_sock_connect = dup(sock);
+    EXPECT_OK(cap_sock_connect);
+    EXPECT_OK(cap_rights_limit(cap_sock_connect, &r_connect));
+    close(sock);
+
+    // Can only sendmsg(2) to an address over a socket with CAP_CONNECT.
+    unsigned char buffer[256];
+    struct iovec iov;
+    memset(&iov, 0, sizeof(iov));
+    iov.iov_base = buffer;
+    iov.iov_len = sizeof(buffer);
+
+    struct msghdr mh;
+    memset(&mh, 0, sizeof(mh));
+    mh.msg_iov = &iov;
+    mh.msg_iovlen = 1;
+
+    struct sockaddr_in addr;
+    memset(&addr, 0, sizeof(addr));
+    addr.sin_family = AF_INET;
+    addr.sin_port = htons(port);
+    addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+    mh.msg_name = &addr;
+    mh.msg_namelen = sizeof(addr);
+
+    EXPECT_NOTCAPABLE(sendmsg(cap_sock_rw, &mh, 0));
+    EXPECT_OK(sendmsg(cap_sock_connect, &mh, 0));
+
+#ifdef HAVE_SEND_RECV_MMSG
+    struct mmsghdr mv;
+    memset(&mv, 0, sizeof(mv));
+    memcpy(&mv.msg_hdr, &mh, sizeof(struct msghdr));
+    EXPECT_NOTCAPABLE(sendmmsg(cap_sock_rw, &mv, 1, 0));
+    EXPECT_OK(sendmmsg(cap_sock_connect, &mv, 1, 0));
+#endif
+    close(cap_sock_rw);
+    close(cap_sock_connect);
+    exit(HasFailure());
+  }
+  // Wait for the child.
+  int status;
+  EXPECT_EQ(child, waitpid(child, &status, 0));
+  int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+  EXPECT_EQ(0, rc);
+
+  close(cap_sock_rw);
+  close(cap_sock_all);
+}
--- a/contrib/capsicum-test/syscalls.h
+++ b/contrib/capsicum-test/syscalls.h
@ -0,0 +1,259 @@
+/*
+ * Minimal portability layer for system call differences between
+ * Capsicum OSes.
+ */
+#ifndef __SYSCALLS_H__
+#define __SYSCALLS_H__
+
+/************************************************************
+ * FreeBSD
+ ************************************************************/
+#ifdef __FreeBSD__
+
+/* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */
+#define umount2(T, F) unmount(T, F)
+
+/* Map sighandler_y (Linux) to sig_t (FreeBSD) */
+#define sighandler_t sig_t
+
+/* profil(2) has a first argument of char* */
+#define profil_arg1_t char
+
+/* FreeBSD has getdents(2) available */
+#include <sys/types.h>
+#include <dirent.h>
+inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
+  return getdents(fd, (char*)dirp, count);
+}
+#include <sys/mman.h>
+inline int mincore_(void *addr, size_t length, unsigned char *vec) {
+  return mincore(addr, length, (char*)vec);
+}
+#define getpid_ getpid
+
+/* Map Linux-style sendfile to FreeBSD sendfile */
+#include <sys/socket.h>
+#include <sys/uio.h>
+inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) {
+  return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0);
+}
+
+/* A sample mount(2) call */
+#include <sys/param.h>
+#include <sys/mount.h>
+inline int bogus_mount_() {
+  return mount("procfs", "/not_mounted", 0, NULL);
+}
+
+/* Mappings for extended attribute functions */
+#include <sys/extattr.h>
+inline ssize_t flistxattr_(int fd, char *list, size_t size) {
+  return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size);
+}
+inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) {
+  return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
+}
+inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) {
+  return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
+}
+inline int fremovexattr_(int fd, const char *name) {
+  return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name);
+}
+
+/* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */
+#include <sys/syscall.h>
+extern "C" {
+extern int __sys_kmq_notify(int, const struct sigevent *);
+extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *);
+extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict);
+extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t,
+                                      unsigned *__restrict, const struct timespec *__restrict);
+extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned,
+                               const struct timespec *);
+extern int  __sys_kmq_unlink(const char *);
+}
+#define mq_notify_ __sys_kmq_notify
+#define mq_open_ __sys_kmq_open
+#define mq_setattr_ __sys_kmq_setattr
+#define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B)
+#define mq_timedreceive_ __sys_kmq_timedreceive
+#define mq_timedsend_ __sys_kmq_timedsend
+#define mq_unlink_ __sys_kmq_unlink
+#define mq_close_ close
+#include <sys/ptrace.h>
+inline long ptrace_(int request, pid_t pid, void *addr, void *data) {
+  return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data));
+}
+#define PTRACE_PEEKDATA_ PT_READ_D
+#define getegid_ getegid
+#define getgid_ getgid
+#define geteuid_ geteuid
+#define getuid_ getuid
+#define getgroups_ getgroups
+#define getrlimit_ getrlimit
+#define bind_ bind
+#define connect_ connect
+
+/* Features available */
+#if __FreeBSD_version >= 1000000
+#define HAVE_CHFLAGSAT
+#define HAVE_BINDAT
+#define HAVE_CONNECTAT
+#endif
+#define HAVE_CHFLAGS
+#define HAVE_GETFSSTAT
+#define HAVE_REVOKE
+#define HAVE_GETLOGIN
+#define HAVE_MKFIFOAT
+#define HAVE_SYSARCH
+#include <machine/sysarch.h>
+#define HAVE_STAT_BIRTHTIME
+#define HAVE_SYSCTL
+#define HAVE_FPATHCONF
+#define HAVE_F_DUP2FD
+#define HAVE_PSELECT
+#define HAVE_SCTP
+
+/* FreeBSD only allows root to call mlock[all]/munlock[all] */
+#define MLOCK_REQUIRES_ROOT 1
+/* FreeBSD effectively only allows root to call sched_setscheduler */
+#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
+
+#endif  /* FreeBSD */
+
+/************************************************************
+ * Linux
+ ************************************************************/
+#ifdef __linux__
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/sendfile.h>
+#include <sys/statfs.h>
+#include <sys/xattr.h>
+#include <sys/mount.h>
+#include <linux/net.h>
+
+/* profil(2) has a first argument of unsigned short* */
+#define profil_arg1_t unsigned short
+
+static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
+  return syscall(__NR_getdents, fd, dirp, count);
+}
+/* A sample mount(2) call */
+static inline int bogus_mount_() {
+  return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, "");
+}
+
+/* libc's getpid() wrapper caches the pid value, and doesn't invalidate
+ * the cached value on pdfork(), so directly syscall. */
+static inline pid_t getpid_() {
+  return syscall(__NR_getpid);
+}
+static inline int execveat(int fd, const char *path,
+                           char *const argv[], char *const envp[], int flags) {
+  return syscall(__NR_execveat, fd, path, argv, envp, flags);
+}
+
+/*
+ * Linux glibc includes an fexecve() function, implemented via the /proc
+ * filesystem.  Bypass this and go directly to the execveat(2) syscall.
+ */
+static inline int fexecve_(int fd, char *const argv[], char *const envp[]) {
+  return execveat(fd, "", argv, envp, AT_EMPTY_PATH);
+}
+/*
+ * Linux glibc attempts to be clever and intercepts various uid/gid functions.
+ * Bypass by calling the syscalls directly.
+ */
+static inline gid_t getegid_(void) { return syscall(__NR_getegid); }
+static inline gid_t getgid_(void) { return syscall(__NR_getgid); }
+static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); }
+static inline uid_t getuid_(void) { return syscall(__NR_getuid); }
+static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); }
+static inline int getrlimit_(int resource, struct rlimit *rlim) {
+  return syscall(__NR_getrlimit, resource, rlim);
+}
+
+/*
+ * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation,
+ * so use the raw syscall for those operations that are disallowed in capability mode.
+ */
+#ifdef __NR_bind
+#define bind_ bind
+#else
+static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+  unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
+  return syscall(__NR_socketcall, SYS_BIND, args);
+}
+#endif
+#ifdef __NR_connect
+#define connect_ connect
+#else
+static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+  unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
+  return syscall(__NR_socketcall, SYS_CONNECT, args);
+}
+#endif
+
+#define mincore_ mincore
+#define sendfile_ sendfile
+#define flistxattr_ flistxattr
+#define fgetxattr_ fgetxattr
+#define fsetxattr_ fsetxattr
+#define fremovexattr_ fremovexattr
+#define mq_notify_ mq_notify
+#define mq_open_ mq_open
+#define mq_setattr_ mq_setattr
+#define mq_getattr_ mq_getattr
+#define mq_timedreceive_ mq_timedreceive
+#define mq_timedsend_ mq_timedsend
+#define mq_unlink_ mq_unlink
+#define mq_close_ mq_close
+#define ptrace_ ptrace
+#define PTRACE_PEEKDATA_ PTRACE_PEEKDATA
+
+/* Features available */
+#define HAVE_DUP3
+#define HAVE_PIPE2
+#include <sys/fsuid.h>  /* for setfsgid()/setfsuid() */
+#define HAVE_SETFSUID
+#define HAVE_SETFSGID
+#define HAVE_READAHEAD
+#define HAVE_SEND_RECV_MMSG
+#define HAVE_SYNCFS
+#define HAVE_SYNC_FILE_RANGE
+#include <sys/uio.h>  /* for vmsplice */
+#define HAVE_TEE
+#define HAVE_SPLICE
+#define HAVE_VMSPLICE
+#define HAVE_PSELECT
+#define HAVE_PPOLL
+#define HAVE_EXECVEAT
+#define HAVE_SYSCALL
+#define HAVE_MKNOD_REG
+#define HAVE_MKNOD_SOCKET
+/*
+ * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file
+ * and the normal <fcntl.h> as they have some clashing definitions.  Bypass by directly
+ * defining O_BENEATH, using the current proposed x86 value.  (This will therefore not
+ * work for non-x86, and may need changing in future if a different value gets merged.)
+ */
+#ifndef O_BENEATH
+#define O_BENEATH	040000000	/* no / or .. in openat path */
+#endif
+
+
+/* Linux allows anyone to call mlock[all]/munlock[all] */
+#define MLOCK_REQUIRES_ROOT 0
+/* Linux allows anyone to call sched_setscheduler */
+#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
+
+#endif  /* Linux */
+
+#endif /*__SYSCALLS_H__*/
--- a/contrib/capsicum-test/sysctl.cc
+++ b/contrib/capsicum-test/sysctl.cc
@ -0,0 +1,15 @@
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+#ifdef HAVE_SYSCTL
+#include <sys/sysctl.h>
+
+// Certain sysctls are permitted in capability mode, but most are not.  Test
+// for the ones that should be, and try one or two that shouldn't.
+TEST(Sysctl, Capability) {
+  int oid[2] = {CTL_KERN, KERN_OSRELDATE};
+  int ii;
+  size_t len = sizeof(ii);
+  EXPECT_OK(sysctl(oid, 2, &ii, &len, NULL, 0));
+}
+#endif
--- a/contrib/capsicum-test/waittest.c
+++ b/contrib/capsicum-test/waittest.c
@ -0,0 +1,42 @@
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#ifdef __FreeBSD__
+#include <sys/procdesc.h>
+#endif
+
+#ifdef __linux__
+#include <sys/syscall.h>
+int pdfork(int *fd, int flags) {
+  return syscall(__NR_pdfork, fd, flags);
+}
+#endif
+
+int main() {
+  int procfd;
+  int rc =  pdfork(&procfd, 0);
+  if (rc < 0) {
+    fprintf(stderr, "pdfork() failed rc=%d errno=%d %s\n", rc, errno, strerror(errno));
+    exit(1);
+  }
+  if (rc == 0) { // Child process
+    sleep(1);
+    exit(123);
+  }
+  fprintf(stderr, "pdfork()ed child pid=%ld procfd=%d\n", (long)rc, procfd);
+  sleep(2);  // Allow child to complete
+  pid_t child = waitpid(-1, &rc, WNOHANG);
+  if (child == 0) {
+    fprintf(stderr, "waitpid(): no completed child found\n");
+  } else if (child < 0) {
+    fprintf(stderr, "waitpid(): failed errno=%d %s\n", errno, strerror(errno));
+  } else {
+    fprintf(stderr, "waitpid(): found completed child %ld\n", (long)child);
+  }
+  return 0;
+}
--- a/tests/sys/capsicum/Makefile
+++ b/tests/sys/capsicum/Makefile
@ -1,5 +1,7 @@
 # $FreeBSD$

+.include <src.opts.mk>
+
 TESTSDIR=	${TESTSBASE}/sys/capsicum

 ATF_TESTS_C+=	bindat_connectat
@ -7,6 +9,49 @@ ATF_TESTS_C+=	ioctls_test

 CFLAGS+=	-I${SRCTOP}/tests

+.if ${MK_GOOGLETEST} != no
+
+.PATH: ${SRCTOP}/contrib/capsicum-test
+
+GTESTS+=	capsicum-test
+
+SRCS.capsicum-test+=	\
+	capsicum-test-main.cc \
+	capsicum-test.cc \
+	capability-fd.cc \
+	fexecve.cc \
+	procdesc.cc \
+	capmode.cc \
+	fcntl.cc \
+	ioctl.cc \
+	openat.cc \
+	sysctl.cc \
+	select.cc \
+	mqueue.cc \
+	socket.cc \
+	sctp.cc \
+	capability-fd-pair.cc \
+	overhead.cc \
+	rename.cc
+
+LIBADD.capsicum-test+=	gtest pthread
+TEST_METADATA.capsicum-test=	required_user="unprivileged"
+
+.for p in mini-me mini-me.noexec mini-me.setuid
+PROGS+=		$p
+NO_SHARED.$p=
+SRCS.$p=	mini-me.c
+.endfor
+
+BINDIR=	${TESTSDIR}
+
+BINMODE.mini-me.noexec=	${NOBINMODE}
+BINMODE.mini-me.setuid=	4555
+
+WARNS.capsicum-test=	3
+
+.endif
+
 WARNS?=	6

 .include <bsd.test.mk>