Update jemalloc to 4.5.0.

svn path=/head/; revision=314529
2024-07-21 18:27:22 +00:00 · 2017-03-02 01:14:48 +00:00 · 2017-03-02 01:14:48 +00:00 · 8244f2aa7c · 2020-12-20 02:59:44 +00:00
parent c74cccf0f8
commit 8244f2aa7c
28 changed files with 580 additions and 235 deletions
--- a/contrib/jemalloc/ChangeLog
+++ b/contrib/jemalloc/ChangeLog
@ -4,6 +4,41 @@ brevity.  Much more detail can be found in the git revision history:

    https://github.com/jemalloc/jemalloc

+* 4.5.0 (February 28, 2017)
+
+  This is the first release to benefit from much broader continuous integration
+  testing, thanks to @davidtgoldblatt.  Had we had this testing infrastructure
+  in place for prior releases, it would have caught all of the most serious
+  regressions fixed by this release.
+
+  New features:
+  - Add --disable-thp and the opt.thp to provide opt-out mechanisms for
+    transparent huge page integration.  (@jasone)
+  - Update zone allocator integration to work with macOS 10.12.  (@glandium)
+  - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and
+    EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not
+    during configuration.  (@jasone, @ronawho)
+
+  Bug fixes:
+  - Fix DSS (sbrk(2)-based) allocation.  This regression was first released in
+    4.3.0.  (@jasone)
+  - Handle race in per size class utilization computation.  This functionality
+    was first released in 4.0.0.  (@interwq)
+  - Fix lock order reversal during gdump.  (@jasone)
+  - Fix-refactor tcache synchronization.  This regression was first released in
+    4.0.0.  (@jasone)
+  - Fix various JSON-formatted malloc_stats_print() bugs.  This functionality
+    was first released in 4.3.0.  (@jasone)
+  - Fix huge-aligned allocation.  This regression was first released in 4.4.0.
+    (@jasone)
+  - When transparent huge page integration is enabled, detect what state pages
+    start in according to the kernel's current operating mode, and only convert
+    arena chunks to non-huge during purging if that is not their initial state.
+    This functionality was first released in 4.4.0.  (@jasone)
+  - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case.
+    This regression was first released in 4.0.0.  (@jasone, @428desmo)
+  - Properly detect sparc64 when building for Linux.  (@glaubitz)
+
 * 4.4.0 (December 3, 2016)

  New features:
--- a/contrib/jemalloc/FREEBSD-Xlist
+++ b/contrib/jemalloc/FREEBSD-Xlist
@ -49,6 +49,7 @@ include/msvc_compat/
 install-sh
 jemalloc.pc*
 msvc/
+scripts/
 src/valgrind.c
 src/zone.c
 test/
--- a/contrib/jemalloc/FREEBSD-diffs
+++ b/contrib/jemalloc/FREEBSD-diffs
@ -1,5 +1,5 @@
 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
-index d9c8345..9898c3c 100644
+index c97ab0f..be8dda5 100644
 --- a/doc/jemalloc.xml.in
 +++ b/doc/jemalloc.xml.in
@@ -53,11 +53,23 @@
@ -27,7 +27,7 @@ index d9c8345..9898c3c 100644
       <refsect2>
         <title>Standard API</title>
         <funcprototype>
-@@ -2963,4 +2975,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
+@@ -2989,4 +3001,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
     <para>The <function>posix_memalign()</function> function conforms
     to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
@ -47,10 +47,10 @@ index d9c8345..9898c3c 100644
 +  </refsect1>
 </refentry>
 diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
-index ce4e602..35360b6 100644
+index 119e3a5..277989f 100644
 --- a/include/jemalloc/internal/arena.h
 +++ b/include/jemalloc/internal/arena.h
-@@ -730,8 +730,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
+@@ -731,8 +731,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
 arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
 {
@ -64,7 +64,7 @@ index ce4e602..35360b6 100644
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-@@ -790,8 +795,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
+@@ -791,8 +796,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 JEMALLOC_ALWAYS_INLINE const size_t *
 arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
 {
@ -79,7 +79,7 @@ index ce4e602..35360b6 100644
 
 JEMALLOC_ALWAYS_INLINE size_t
 diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
-index e7ace7d..d86c61d 100644
+index e3b499a..827fdbf 100644
 --- a/include/jemalloc/internal/jemalloc_internal.h.in
 +++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -8,6 +8,9 @@
@ -122,7 +122,7 @@ index c907d91..4626632 100644
 #ifdef _WIN32
 #  include <windows.h>
 diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
-index b442d2d..76518db 100644
+index 2b4b1c3..e03a6d0 100644
 --- a/include/jemalloc/internal/mutex.h
 +++ b/include/jemalloc/internal/mutex.h
@@ -57,9 +57,6 @@ struct malloc_mutex_s {
@ -144,10 +144,10 @@ index b442d2d..76518db 100644
 
 #endif /* JEMALLOC_H_EXTERNS */
 diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
-index c1c6c40..c6395fd 100644
+index 60b57e5..056a8fe 100644
 --- a/include/jemalloc/internal/private_symbols.txt
 +++ b/include/jemalloc/internal/private_symbols.txt
-@@ -310,7 +310,6 @@ iralloct_realign
+@@ -312,7 +312,6 @@ iralloct_realign
 isalloc
 isdalloct
 isqalloc
@ -335,7 +335,7 @@ index f943891..47d032c 100755
 +#include "jemalloc_FreeBSD.h"
 EOF
 diff --git a/src/jemalloc.c b/src/jemalloc.c
-index baead66..8a49f26 100644
+index f73a26c..fcfe204 100644
 --- a/src/jemalloc.c
 +++ b/src/jemalloc.c
@@ -4,6 +4,10 @@
@ -349,7 +349,7 @@ index baead66..8a49f26 100644
 /* Runtime configuration options. */
 const char	*je_malloc_conf
 #ifndef _WIN32
-@@ -2775,6 +2779,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
+@@ -2781,6 +2785,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
  */
 /******************************************************************************/
 /*
@ -457,7 +457,7 @@ index baead66..8a49f26 100644
  * The following functions are used by threading libraries for protection of
  * malloc during fork().
  */
-@@ -2913,4 +3018,11 @@ jemalloc_postfork_child(void)
+@@ -2922,4 +3027,11 @@ jemalloc_postfork_child(void)
 	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
@ -516,7 +516,7 @@ index 6333e73..13f8d79 100644
 +#endif
 +}
 diff --git a/src/util.c b/src/util.c
-index dd8c236..a4ff287 100755
+index dd8c236..a4ff287 100644
 --- a/src/util.c
 +++ b/src/util.c
@@ -67,6 +67,22 @@ wrtmessage(void *cbopaque, const char *s)
--- a/contrib/jemalloc/VERSION
+++ b/contrib/jemalloc/VERSION
@ -1 +1 @@
-4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc
+4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5
--- a/contrib/jemalloc/doc/jemalloc.3
+++ b/contrib/jemalloc/doc/jemalloc.3
@ -2,12 +2,12 @@
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
 .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
-.\"      Date: 12/04/2016
+.\"      Date: 02/28/2017
 .\"    Manual: User Manual
-.\"    Source: jemalloc 4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc
+.\"    Source: jemalloc 4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "12/04/2016" "jemalloc 4.4.0-0-gf1f76357313e" "User Manual"
+.TH "JEMALLOC" "3" "02/28/2017" "jemalloc 4.5.0-0-g04380e79f1e2" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 4\&.4\&.0\-0\-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc\&. More information can be found at the
+This manual describes jemalloc 4\&.5\&.0\-0\-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .PP
 The following configuration options are enabled in libc\*(Aqs built\-in jemalloc:
@ -779,6 +779,12 @@ config\&.tcache (\fBbool\fR) r\-
 was not specified during build configuration\&.
 .RE
 .PP
+config\&.thp (\fBbool\fR) r\-
+.RS 4
+\fB\-\-disable\-thp\fR
+was not specified during build configuration, and the system supports transparent huge page manipulation\&.
+.RE
+.PP
 config\&.tls (\fBbool\fR) r\-
 .RS 4
 \fB\-\-disable\-tls\fR
@ -965,6 +971,11 @@ option for related tuning information\&. This option is enabled by default unles
 \m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, in which case it is forcefully disabled\&.
 .RE
 .PP
+opt\&.thp (\fBbool\fR) r\- [\fB\-\-enable\-thp\fR]
+.RS 4
+Transparent huge page (THP) integration enabled/disabled\&. When enabled, THPs are explicitly disabled as a side effect of unused dirty page purging for chunks that back small and/or large allocations, because such chunks typically comprise active, unused dirty, and untouched clean pages\&. This option is enabled by default\&.
+.RE
+.PP
 opt\&.lg_tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
 .RS 4
 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&.
--- a/contrib/jemalloc/include/jemalloc/internal/arena.h
+++ b/contrib/jemalloc/include/jemalloc/internal/arena.h
@ -506,6 +506,7 @@ static const size_t	large_pad =
 #endif
    ;

+extern bool		opt_thp;
 extern purge_mode_t	opt_purge;
 extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
--- a/contrib/jemalloc/include/jemalloc/internal/chunk.h
+++ b/contrib/jemalloc/include/jemalloc/internal/chunk.h
@ -52,8 +52,8 @@ chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
 chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
    const chunk_hooks_t *chunk_hooks);

-bool	chunk_register(tsdn_t *tsdn, const void *chunk,
-    const extent_node_t *node);
+bool	chunk_register(const void *chunk, const extent_node_t *node,
+    bool *gdump);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
--- a/contrib/jemalloc/include/jemalloc/internal/extent.h
+++ b/contrib/jemalloc/include/jemalloc/internal/extent.h
@ -75,6 +75,11 @@ typedef rb_tree(extent_node_t) extent_tree_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS

+#ifdef JEMALLOC_JET
+size_t	extent_size_quantize_floor(size_t size);
+#endif
+size_t	extent_size_quantize_ceil(size_t size);
+
 rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t)

 rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
@ -96,6 +96,13 @@ static const bool config_tcache =
    false
 #endif
    ;
+static const bool config_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
    true
@ -155,7 +162,6 @@ static const bool config_cache_oblivious =
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
-#include <malloc/malloc.h>
 #endif

 #include "jemalloc/internal/ph.h"
--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
@ -240,7 +240,6 @@
 * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
 */
 /* #undef JEMALLOC_ZONE */
-/* #undef JEMALLOC_ZONE_VERSION */

 /*
 * Methods for determining whether the OS overcommits.
@ -254,6 +253,12 @@
 /* Defined if madvise(2) is available. */
 #define JEMALLOC_HAVE_MADVISE 

+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
+
 /*
 * Methods for purging unused pages differ between operating systems.
 *
@ -266,10 +271,7 @@
 #define JEMALLOC_PURGE_MADVISE_FREE 
 #define JEMALLOC_PURGE_MADVISE_DONTNEED 

-/*
- * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
- * arguments to madvise(2).
- */
+/* Defined if transparent huge page support is enabled. */
 /* #undef JEMALLOC_THP */

 /* Define if operating system has alloca.h header. */
--- a/contrib/jemalloc/include/jemalloc/internal/mb.h
+++ b/contrib/jemalloc/include/jemalloc/internal/mb.h
@ -76,7 +76,7 @@ mb_write(void)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#elif defined(__sparc64__)
+#elif defined(__sparc__) && defined(__arch64__)
 JEMALLOC_INLINE void
 mb_write(void)
 {
--- a/contrib/jemalloc/include/jemalloc/internal/mutex.h
+++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h
@ -83,8 +83,8 @@ JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {

+	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@ -98,16 +98,16 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
-		witness_lock(tsdn, &mutex->witness);
 	}
+	witness_lock(tsdn, &mutex->witness);
 }

 JEMALLOC_INLINE void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {

+	witness_unlock(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@ -128,16 +128,14 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {

-	if (isthreaded)
-		witness_assert_owner(tsdn, &mutex->witness);
+	witness_assert_owner(tsdn, &mutex->witness);
 }

 JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {

-	if (isthreaded)
-		witness_assert_not_owner(tsdn, &mutex->witness);
+	witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif

--- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
+++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
@ -223,6 +223,8 @@
 #define	extent_node_sn_set JEMALLOC_N(extent_node_sn_set)
 #define	extent_node_zeroed_get JEMALLOC_N(extent_node_zeroed_get)
 #define	extent_node_zeroed_set JEMALLOC_N(extent_node_zeroed_set)
+#define	extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
+#define	extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
 #define	extent_tree_ad_destroy JEMALLOC_N(extent_tree_ad_destroy)
 #define	extent_tree_ad_destroy_recurse JEMALLOC_N(extent_tree_ad_destroy_recurse)
 #define	extent_tree_ad_empty JEMALLOC_N(extent_tree_ad_empty)
@ -389,6 +391,7 @@
 #define	opt_redzone JEMALLOC_N(opt_redzone)
 #define	opt_stats_print JEMALLOC_N(opt_stats_print)
 #define	opt_tcache JEMALLOC_N(opt_tcache)
+#define	opt_thp JEMALLOC_N(opt_thp)
 #define	opt_utrace JEMALLOC_N(opt_utrace)
 #define	opt_xmalloc JEMALLOC_N(opt_xmalloc)
 #define	opt_zero JEMALLOC_N(opt_zero)
@ -528,6 +531,9 @@
 #define	tcache_get JEMALLOC_N(tcache_get)
 #define	tcache_get_hard JEMALLOC_N(tcache_get_hard)
 #define	tcache_maxclass JEMALLOC_N(tcache_maxclass)
+#define	tcache_postfork_child JEMALLOC_N(tcache_postfork_child)
+#define	tcache_postfork_parent JEMALLOC_N(tcache_postfork_parent)
+#define	tcache_prefork JEMALLOC_N(tcache_prefork)
 #define	tcache_salloc JEMALLOC_N(tcache_salloc)
 #define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
 #define	tcaches JEMALLOC_N(tcaches)
@ -611,14 +617,16 @@
 #define	valgrind_make_mem_defined JEMALLOC_N(valgrind_make_mem_defined)
 #define	valgrind_make_mem_noaccess JEMALLOC_N(valgrind_make_mem_noaccess)
 #define	valgrind_make_mem_undefined JEMALLOC_N(valgrind_make_mem_undefined)
+#define	witness_assert_depth JEMALLOC_N(witness_assert_depth)
+#define	witness_assert_depth_to_rank JEMALLOC_N(witness_assert_depth_to_rank)
 #define	witness_assert_lockless JEMALLOC_N(witness_assert_lockless)
 #define	witness_assert_not_owner JEMALLOC_N(witness_assert_not_owner)
 #define	witness_assert_owner JEMALLOC_N(witness_assert_owner)
+#define	witness_depth_error JEMALLOC_N(witness_depth_error)
 #define	witness_fork_cleanup JEMALLOC_N(witness_fork_cleanup)
 #define	witness_init JEMALLOC_N(witness_init)
 #define	witness_lock JEMALLOC_N(witness_lock)
 #define	witness_lock_error JEMALLOC_N(witness_lock_error)
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
 #define	witness_owner JEMALLOC_N(witness_owner)
 #define	witness_owner_error JEMALLOC_N(witness_owner_error)
--- a/contrib/jemalloc/include/jemalloc/internal/tcache.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h
@ -149,6 +149,9 @@ bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
+void tcache_prefork(tsdn_t *tsdn);
+void tcache_postfork_parent(tsdn_t *tsdn);
+void tcache_postfork_child(tsdn_t *tsdn);

 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
--- a/contrib/jemalloc/include/jemalloc/internal/tsd.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h
@ -479,13 +479,14 @@ a_name##tsd_wrapper_get(bool init)					\
 									\
 	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
-		wrapper = tsd_init_check_recursion(			\
-		    &a_name##tsd_init_head, &block);			\
+		wrapper = (a_name##tsd_wrapper_t *)			\
+		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
+		    &block);						\
 		if (wrapper)						\
 		    return (wrapper);					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
-		block.data = wrapper;					\
+		block.data = (void *)wrapper;				\
 		if (wrapper == NULL) {					\
 			malloc_write("<jemalloc>: Error allocating"	\
 			    " TSD for "#a_name"\n");			\
--- a/contrib/jemalloc/include/jemalloc/internal/witness.h
+++ b/contrib/jemalloc/include/jemalloc/internal/witness.h
@ -12,21 +12,32 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 */
 #define	WITNESS_RANK_OMIT		0U

+#define WITNESS_RANK_MIN		1U
+
 #define	WITNESS_RANK_INIT		1U
 #define	WITNESS_RANK_CTL		1U
-#define	WITNESS_RANK_ARENAS		2U
+#define WITNESS_RANK_TCACHES		2U
+#define	WITNESS_RANK_ARENAS		3U

-#define	WITNESS_RANK_PROF_DUMP		3U
-#define	WITNESS_RANK_PROF_BT2GCTX	4U
-#define	WITNESS_RANK_PROF_TDATAS	5U
-#define	WITNESS_RANK_PROF_TDATA		6U
-#define	WITNESS_RANK_PROF_GCTX		7U
+#define	WITNESS_RANK_PROF_DUMP		4U
+#define	WITNESS_RANK_PROF_BT2GCTX	5U
+#define	WITNESS_RANK_PROF_TDATAS	6U
+#define	WITNESS_RANK_PROF_TDATA		7U
+#define	WITNESS_RANK_PROF_GCTX		8U

-#define	WITNESS_RANK_ARENA		8U
-#define	WITNESS_RANK_ARENA_CHUNKS	9U
-#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+/*
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE		9U

-#define	WITNESS_RANK_BASE		11U
+#define	WITNESS_RANK_ARENA		9U
+#define	WITNESS_RANK_ARENA_CHUNKS	10U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	11U
+
+#define	WITNESS_RANK_BASE		12U

 #define	WITNESS_RANK_LEAF		0xffffffffU
 #define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
@ -91,10 +102,12 @@ extern witness_not_owner_error_t *witness_not_owner_error;
 void	witness_not_owner_error(const witness_t *witness);
 #endif
 #ifdef JEMALLOC_JET
-typedef void (witness_lockless_error_t)(const witness_list_t *);
-extern witness_lockless_error_t *witness_lockless_error;
+typedef void (witness_depth_error_t)(const witness_list_t *,
+    witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *witness_depth_error;
 #else
-void	witness_lockless_error(const witness_list_t *witnesses);
+void	witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth);
 #endif

 void	witnesses_cleanup(tsd_t *tsd);
@ -111,6 +124,9 @@ void	witness_postfork_child(tsd_t *tsd);
 bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth);
+void witness_assert_depth(tsdn_t *tsdn, unsigned depth);
 void	witness_assert_lockless(tsdn_t *tsdn);
 void	witness_lock(tsdn_t *tsdn, witness_t *witness);
 void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
@ -123,6 +139,8 @@ witness_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;

+	cassert(config_debug);
+
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
 		if (w == witness)
@ -175,9 +193,10 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
 }

 JEMALLOC_INLINE void
-witness_assert_lockless(tsdn_t *tsdn)
-{
+witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth) {
 	tsd_t *tsd;
+	unsigned d;
 	witness_list_t *witnesses;
 	witness_t *w;

@ -188,10 +207,29 @@ witness_assert_lockless(tsdn_t *tsdn)
 		return;
 	tsd = tsdn_tsd(tsdn);

+	d = 0;
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
-	if (w != NULL)
-		witness_lockless_error(witnesses);
+	if (w != NULL) {
+		ql_reverse_foreach(w, witnesses, link) {
+			if (w->rank < rank_inclusive) {
+				break;
+			}
+			d++;
+		}
+	}
+	if (d != depth)
+		witness_depth_error(witnesses, rank_inclusive, depth);
+}
+
+JEMALLOC_INLINE void
+witness_assert_depth(tsdn_t *tsdn, unsigned depth) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth);
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn) {
+	witness_assert_depth(tsdn, 0);
 }

 JEMALLOC_INLINE void
--- a/contrib/jemalloc/include/jemalloc/jemalloc.h
+++ b/contrib/jemalloc/include/jemalloc/jemalloc.h
@ -87,12 +87,12 @@ extern "C" {
 #include <limits.h>
 #include <strings.h>

-#define	JEMALLOC_VERSION "4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc"
+#define	JEMALLOC_VERSION "4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5"
 #define	JEMALLOC_VERSION_MAJOR 4
-#define	JEMALLOC_VERSION_MINOR 4
+#define	JEMALLOC_VERSION_MINOR 5
 #define	JEMALLOC_VERSION_BUGFIX 0
 #define	JEMALLOC_VERSION_NREV 0
-#define	JEMALLOC_VERSION_GID "f1f76357313e7dcad7262f17a48ff0a2e005fcdc"
+#define	JEMALLOC_VERSION_GID "04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5"

 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
--- a/contrib/jemalloc/src/arena.c
+++ b/contrib/jemalloc/src/arena.c
@ -4,6 +4,8 @@
 /******************************************************************************/
 /* Data. */

+bool		opt_thp = true;
+static bool	thp_initially_huge;
 purge_mode_t	opt_purge = PURGE_DEFAULT;
 const char	*purge_mode_names[] = {
 	"ratio",
@ -568,8 +570,8 @@ arena_chunk_init_spare(arena_t *arena)
 }

 static bool
-arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    size_t sn, bool zero)
+arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, size_t sn, bool zero,
+    bool *gdump)
 {

 	/*
@ -580,7 +582,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(tsdn, chunk, &chunk->node));
+	return (chunk_register(chunk, &chunk->node, gdump));
 }

 static arena_chunk_t *
@ -591,6 +593,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	size_t sn;

 	malloc_mutex_unlock(tsdn, &arena->lock);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);

 	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, &sn, zero, commit);
@ -603,16 +607,20 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn,
-	    *zero)) {
-		if (!*commit) {
-			/* Undo commit of header. */
-			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
+	if (chunk != NULL) {
+		bool gdump;
+		if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) {
+			if (!*commit) {
+				/* Undo commit of header. */
+				chunk_hooks->decommit(chunk, chunksize, 0,
+				    map_bias << LG_PAGE, arena->ind);
+			}
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    (void *)chunk, chunksize, sn, *zero, *commit);
+			chunk = NULL;
 		}
-		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
-		    chunksize, sn, *zero, *commit);
-		chunk = NULL;
+		if (config_prof && opt_prof && gdump)
+			prof_gdump(tsdn);
 	}

 	malloc_mutex_lock(tsdn, &arena->lock);
@ -627,14 +635,24 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t sn;

+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, &sn, zero, commit, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) {
+		bool gdump;
+		if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, sn, true);
 			return (NULL);
 		}
+		if (config_prof && opt_prof && gdump) {
+			malloc_mutex_unlock(tsdn, &arena->lock);
+			prof_gdump(tsdn);
+			malloc_mutex_lock(tsdn, &arena->lock);
+		}
 	}
 	if (chunk == NULL) {
 		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
@ -664,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	if (chunk == NULL)
 		return (NULL);

-	chunk->hugepage = true;
+	if (config_thp && opt_thp) {
+		chunk->hugepage = thp_initially_huge;
+	}

 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
@ -729,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
-	size_t sn, hugepage;
+	size_t sn;
+	UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false);
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;

 	chunk_deregister(chunk, &chunk->node);

 	sn = extent_node_sn_get(&chunk->node);
-	hugepage = chunk->hugepage;
+	if (config_thp && opt_thp) {
+		hugepage = chunk->hugepage;
+	}
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
 		/*
@ -749,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
 		    arena->ind);
 	}
-	if (!hugepage) {
+	if (config_thp && opt_thp && hugepage != thp_initially_huge) {
 		/*
-		 * Convert chunk back to the default state, so that all
-		 * subsequent chunk allocations start out with chunks that can
-		 * be backed by transparent huge pages.
+		 * Convert chunk back to initial THP state, so that all
+		 * subsequent chunk allocations start out in a consistent state.
 		 */
-		pages_huge(chunk, chunksize);
+		if (thp_initially_huge) {
+			pages_huge(chunk, chunksize);
+		} else {
+			pages_nohuge(chunk, chunksize);
+		}
 	}

 	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
@ -1695,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,

 			/*
 			 * If this is the first run purged within chunk, mark
-			 * the chunk as non-huge.  This will prevent all use of
-			 * transparent huge pages for this chunk until the chunk
-			 * as a whole is deallocated.
+			 * the chunk as non-THP-capable.  This will prevent all
+			 * use of THPs for this chunk until the chunk as a whole
+			 * is deallocated.
 			 */
-			if (chunk->hugepage) {
-				pages_nohuge(chunk, chunksize);
-				chunk->hugepage = false;
+			if (config_thp && opt_thp && chunk->hugepage) {
+				chunk->hugepage = pages_nohuge(chunk,
+				    chunksize);
 			}

 			assert(pageind + npages <= chunk_npages);
@ -2694,6 +2720,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
 		return (arena_malloc_large(tsdn, arena, ind, zero));
+	assert(index2size(ind) >= chunksize);
 	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }

@ -3755,11 +3782,78 @@ bin_info_init(void)
 #undef SC
 }

+static void
+init_thp_initially_huge(void) {
+	int fd;
+	char buf[sizeof("[always] madvise never\n")];
+	ssize_t nread;
+	static const char *enabled_states[] = {
+		"[always] madvise never\n",
+		"always [madvise] never\n",
+		"always madvise [never]\n"
+	};
+	static const bool thp_initially_huge_states[] = {
+		true,
+		false,
+		false
+	};
+	unsigned i;
+
+	if (config_debug) {
+		for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
+		    i++) {
+			assert(sizeof(buf) > strlen(enabled_states[i]));
+		}
+	}
+	assert(sizeof(enabled_states)/sizeof(const char *) ==
+	    sizeof(thp_initially_huge_states)/sizeof(bool));
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open,
+	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+	fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+	if (fd == -1) {
+		goto label_error;
+	}
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1) {
+		goto label_error;
+	}
+	for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
+	    i++) {
+		if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) {
+			thp_initially_huge = thp_initially_huge_states[i];
+			return;
+		}
+	}
+
+label_error:
+	thp_initially_huge = false;
+}
+
 void
 arena_boot(void)
 {
 	unsigned i;

+	if (config_thp && opt_thp) {
+		init_thp_initially_huge();
+	}
+
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
 	arena_decay_time_default_set(opt_decay_time);

@ -3790,15 +3884,8 @@ arena_boot(void)
 	arena_maxrun = chunksize - (map_bias << LG_PAGE);
 	assert(arena_maxrun > 0);
 	large_maxclass = index2size(size2index(chunksize)-1);
-	if (large_maxclass > arena_maxrun) {
-		/*
-		 * For small chunk sizes it's possible for there to be fewer
-		 * non-header pages available than are necessary to serve the
-		 * size classes just below chunksize.
-		 */
-		large_maxclass = arena_maxrun;
-	}
 	assert(large_maxclass > 0);
+	assert(large_maxclass + large_pad <= arena_maxrun);
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;

--- a/contrib/jemalloc/src/chunk.c
+++ b/contrib/jemalloc/src/chunk.c
@ -141,7 +141,7 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 }

 bool
-chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
+chunk_register(const void *chunk, const extent_node_t *node, bool *gdump)
 {

 	assert(extent_node_addr_get(node) == chunk);
@ -160,8 +160,7 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 			 */
 			high = atomic_read_z(&highchunks);
 		}
-		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsdn);
+		*gdump = (cur > high && prof_gdump_get_unlocked());
 	}

 	return (false);
@ -189,12 +188,17 @@ chunk_deregister(const void *chunk, const extent_node_t *node)
 static extent_node_t *
 chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size)
 {
+	extent_node_t *node;
+	size_t qsize;
 	extent_node_t key;

 	assert(size == CHUNK_CEILING(size));

-	extent_node_init(&key, arena, NULL, size, 0, false, false);
-	return (extent_tree_szsnad_nsearch(chunks_szsnad, &key));
+	qsize = extent_size_quantize_ceil(size);
+	extent_node_init(&key, arena, NULL, qsize, 0, false, false);
+	node = extent_tree_szsnad_nsearch(chunks_szsnad, &key);
+	assert(node == NULL || extent_node_size_get(node) >= size);
+	return node;
 }

 static void *
--- a/contrib/jemalloc/src/chunk_dss.c
+++ b/contrib/jemalloc/src/chunk_dss.c
@ -115,8 +115,9 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		 * malloc.
 		 */
 		while (true) {
-			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
-			size_t gap_size, cpad_size;
+			void *ret, *max_cur, *dss_next, *dss_prev;
+			void *gap_addr_chunk, *gap_addr_subchunk;
+			size_t gap_size_chunk, gap_size_subchunk;
 			intptr_t incr;

 			max_cur = chunk_dss_max_update(new_addr);
@ -124,25 +125,32 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;

 			/*
-			 * Calculate how much padding is necessary to
-			 * chunk-align the end of the DSS.
-			 */
-			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
-			    chunksize_mask;
-			/*
-			 * Compute how much chunk-aligned pad space (if any) is
+			 * Compute how much chunk-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			cpad = (void *)((uintptr_t)dss_max + gap_size);
-			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
-			    alignment);
-			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
+			gap_addr_chunk = (void *)(CHUNK_CEILING(
+			    (uintptr_t)max_cur));
+			ret = (void *)ALIGNMENT_CEILING(
+			    (uintptr_t)gap_addr_chunk, alignment);
+			gap_size_chunk = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_chunk;
+			/*
+			 * Compute the address just past the end of the desired
+			 * allocation space.
+			 */
 			dss_next = (void *)((uintptr_t)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+			if ((uintptr_t)ret < (uintptr_t)max_cur ||
+			    (uintptr_t)dss_next < (uintptr_t)max_cur)
 				goto label_oom; /* Wrap-around. */
-			incr = gap_size + cpad_size + size;
+			/* Compute the increment, including subchunk bytes. */
+			gap_addr_subchunk = max_cur;
+			gap_size_subchunk = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_subchunk;
+			incr = gap_size_subchunk + size;
+
+			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
+			    size);

 			/*
 			 * Optimistically update dss_max, and roll back below if
@ -157,11 +165,12 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			dss_prev = chunk_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				if (cpad_size != 0) {
+				if (gap_size_chunk != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
-					    &chunk_hooks, cpad, cpad_size,
+					    &chunk_hooks, gap_addr_chunk,
+					    gap_size_chunk,
 					    arena_extent_sn_next(arena), false,
 					    true);
 				}
--- a/contrib/jemalloc/src/ctl.c
+++ b/contrib/jemalloc/src/ctl.c
@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_tcache)
+CTL_PROTO(config_thp)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_valgrind)
@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_thp)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
@ -258,6 +260,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
 	{NAME("tcache"),	CTL(config_tcache)},
+	{NAME("thp"),		CTL(config_thp)},
 	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("valgrind"),	CTL(config_valgrind)},
@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_tcache, bool)
+CTL_RO_CONFIG_GEN(config_thp, bool)
 CTL_RO_CONFIG_GEN(config_tls, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_valgrind, bool)
@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
 CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
@ -1476,7 +1482,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (!config_tcache)
 		return (ENOENT);

-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@ -1486,8 +1491,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,

 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }

 static int
--- a/contrib/jemalloc/src/extent.c
+++ b/contrib/jemalloc/src/extent.c
@ -3,13 +3,11 @@

 /******************************************************************************/

-/*
- * Round down to the nearest chunk size that can actually be requested during
- * normal huge allocation.
- */
-JEMALLOC_INLINE_C size_t
-extent_quantize(size_t size)
-{
+#ifndef JEMALLOC_JET
+static
+#endif
+size_t
+extent_size_quantize_floor(size_t size) {
 	size_t ret;
 	szind_t ind;

@ -25,11 +23,32 @@ extent_quantize(size_t size)
 	return (ret);
 }

+size_t
+extent_size_quantize_ceil(size_t size) {
+	size_t ret;
+
+	assert(size > 0);
+
+	ret = extent_size_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large extent,
+		 * because under-sized extents may be mixed in.  This only
+		 * happens when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = index2size(size2index(ret  + 1));
+	}
+	return ret;
+}
+
 JEMALLOC_INLINE_C int
 extent_sz_comp(const extent_node_t *a, const extent_node_t *b)
 {
-	size_t a_qsize = extent_quantize(extent_node_size_get(a));
-	size_t b_qsize = extent_quantize(extent_node_size_get(b));
+	size_t a_qsize = extent_size_quantize_floor(extent_node_size_get(a));
+	size_t b_qsize = extent_size_quantize_floor(extent_node_size_get(b));

 	return ((a_qsize > b_qsize) - (a_qsize < b_qsize));
 }
--- a/contrib/jemalloc/src/huge.c
+++ b/contrib/jemalloc/src/huge.c
@ -15,20 +15,20 @@ huge_node_get(const void *ptr)
 }

 static bool
-huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump)
 {

 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(tsdn, ptr, node));
+	return (chunk_register(ptr, node, gdump));
 }

 static void
-huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump)
 {
 	bool err;

-	err = huge_node_set(tsdn, ptr, node);
+	err = huge_node_set(tsdn, ptr, node, gdump);
 	assert(!err);
 }

@ -57,11 +57,13 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	arena_t *iarena;
 	extent_node_t *node;
 	size_t sn;
-	bool is_zeroed;
+	bool is_zeroed, gdump;

 	/* Allocate one or more contiguous chunks for this request. */

 	assert(!tsdn_null(tsdn) || arena != NULL);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);

 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
@ -91,11 +93,13 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,

 	extent_node_init(node, arena, ret, usize, sn, is_zeroed, true);

-	if (huge_node_set(tsdn, ret, node)) {
+	if (huge_node_set(tsdn, ret, node, &gdump)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn);
 		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);

 	/* Insert node into huge. */
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
@ -144,7 +148,10 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	bool pre_zeroed, post_zeroed;
+	bool pre_zeroed, post_zeroed, gdump;
+
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);

 	/* Increase usize to incorporate extra. */
 	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
@ -178,10 +185,13 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);

 	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);

@ -207,7 +217,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
-	bool pre_zeroed, post_zeroed;
+	bool pre_zeroed, post_zeroed, gdump;

 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
@ -215,6 +225,8 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks = chunk_hooks_get(tsdn, arena);

 	assert(oldsize > usize);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);

 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
@ -241,10 +253,13 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	/* Update the size of the huge allocation. */
 	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);

 	/* Zap the excess chunks. */
 	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize,
@ -258,7 +273,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
    size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
-	bool is_zeroed_subchunk, is_zeroed_chunk;
+	bool is_zeroed_subchunk, is_zeroed_chunk, gdump;

 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
@ -266,6 +281,9 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);

+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	/*
 	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
 	 * update extent's zeroed field, and zero as necessary.
@ -280,8 +298,11 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	extent_node_size_set(node, usize);
 	extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
 	    is_zeroed_chunk);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);

 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
--- a/contrib/jemalloc/src/jemalloc.c
+++ b/contrib/jemalloc/src/jemalloc.c
@ -794,18 +794,19 @@ stats_print_atexit(void)
 * Begin initialization functions.
 */

-#ifndef JEMALLOC_HAVE_SECURE_GETENV
 static char *
-secure_getenv(const char *name)
+jemalloc_secure_getenv(const char *name)
 {
-
+#ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#else
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0)
 		return (NULL);
 #  endif
 	return (getenv(name));
-}
 #endif
+}

 static unsigned
 malloc_ncpus(void)
@ -1022,7 +1023,7 @@ malloc_conf_init(void)
 #endif
 			    ;

-			if ((opts = secure_getenv(envname)) != NULL) {
+			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
 				 * the value of the MALLOC_CONF environment
@ -1078,18 +1079,18 @@ malloc_conf_init(void)
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)))			\
+					    (t)(min)))			\
 						o = (t)(min);		\
 					else if (CONF_MAX_##check_max(	\
-					    um, (max)))			\
+					    um, (t)(max)))		\
 						o = (t)(max);		\
 					else				\
 						o = (t)um;		\
 				} else {				\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)) ||			\
+					    (t)(min)) ||		\
 					    CONF_MAX_##check_max(um,	\
-					    (max))) {			\
+					    (t)(max))) {		\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@ -1139,16 +1140,18 @@ malloc_conf_init(void)

 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
 			/*
-			 * Chunks always require at least one header page,
-			 * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and
-			 * possibly an additional page in the presence of
-			 * redzones.  In order to simplify options processing,
-			 * use a conservative bound that accommodates all these
-			 * constraints.
+			 * Chunks always require at least one header page, as
+			 * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages (plus an
+			 * additional page in the presence of cache-oblivious
+			 * large), and possibly an additional page in the
+			 * presence of redzones.  In order to simplify options
+			 * processing, use a conservative bound that
+			 * accommodates all these constraints.
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
-			    (sizeof(size_t) << 3) - 1, yes, yes, true)
+			    LG_SIZE_CLASS_GROUP + 1 + ((config_cache_oblivious
+			    || config_fill) ? 1 : 0), (sizeof(size_t) << 3) - 1,
+			    yes, yes, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@ -1273,6 +1276,9 @@ malloc_conf_init(void)
 				    "lg_tcache_max", -1,
 				    (sizeof(size_t) << 3) - 1)
 			}
+			if (config_thp) {
+				CONF_HANDLE_BOOL(opt_thp, "thp", true)
+			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof", true)
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
@ -2932,6 +2938,7 @@ _malloc_prefork(void)
 	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd_tsdn(tsd));
+	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
 	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
@ -2991,6 +2998,7 @@ _malloc_postfork(void)
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_parent(tsd_tsdn(tsd));
 	ctl_postfork_parent(tsd_tsdn(tsd));
 }

@ -3015,6 +3023,7 @@ jemalloc_postfork_child(void)
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
 }

--- a/contrib/jemalloc/src/pages.c
+++ b/contrib/jemalloc/src/pages.c
@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size)
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);

-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
 	return (false);
@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size)
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);

-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
 #else
 	return (false);
--- a/contrib/jemalloc/src/stats.c
+++ b/contrib/jemalloc/src/stats.c
@ -39,7 +39,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
    bool json, bool large, bool huge, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap, in_gap_prev;
+	bool in_gap, in_gap_prev;
 	unsigned nbins, j;

 	CTL_GET("arenas.page", &page, size_t);
@ -49,7 +49,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
-		CTL_GET("config.tcache", &config_tcache, bool);
 		if (config_tcache) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
@ -137,8 +136,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			availregs = nregs * curruns;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
 			    : 1000;
-			assert(milli <= 1000);
-			if (milli < 10) {
+
+			if (milli > 1000) {
+				/*
+				 * Race detected: the counters were read in
+				 * separate mallctl calls and concurrent
+				 * operations happened in between. In this case
+				 * no meaningful utilization can be computed.
+				 */
+				malloc_snprintf(util, sizeof(util), " race");
+			} else if (milli < 10) {
 				malloc_snprintf(util, sizeof(util),
 				    "0.00%zu", milli);
 			} else if (milli < 100) {
@ -147,8 +154,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			} else if (milli < 1000) {
 				malloc_snprintf(util, sizeof(util), "0.%zu",
 				    milli);
-			} else
+			} else {
+				assert(milli == 1000);
 				malloc_snprintf(util, sizeof(util), "1");
+			}

 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
@ -536,7 +545,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated);

 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t},\n");
+		    "\t\t\t\t}%s\n", (bins || large || huge) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "metadata: mapped: %zu, allocated: %zu\n",
@ -555,7 +564,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,

 static void
 stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool merged, bool unmerged)
+    bool json, bool more)
 {
 	const char *cpv;
 	bool bv;
@ -741,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(xmalloc, ",")
 	OPT_WRITE_BOOL(tcache, ",")
 	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(thp, ",")
 	OPT_WRITE_BOOL(prof, ",")
 	OPT_WRITE_CHAR_P(prof_prefix, ",")
 	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
@ -838,9 +848,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"nbins\": %u,\n", nbins);

-		CTL_GET("arenas.nhbins", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"nhbins\": %u,\n", uv);
+		if (config_tcache) {
+			CTL_GET("arenas.nhbins", &uv, unsigned);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"nhbins\": %u,\n", uv);
+		}

 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"bin\": [\n");
@ -907,11 +919,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t]\n");

 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t},\n");
+		    "\t\t}%s\n", (config_prof || more) ? "," : "");
 	}

 	/* prof. */
-	if (json) {
+	if (config_prof && json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\"prof\": {\n");

@ -937,8 +949,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\"lg_sample\": %zd\n", ssv);

 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
-		    "");
+		    "\t\t}%s\n", more ? "," : "");
 	}
 }

@ -1023,31 +1034,37 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				    narenas, bins, large, huge);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n", (ninitialized > 1) ?
-					    "," : "");
+					    "\t\t\t}%s\n", unmerged ?  "," :
+					    "");
 				}
 			}

 			/* Unmerged stats. */
-			for (i = j = 0; i < narenas; i++) {
-				if (initialized[i]) {
-					if (json) {
-						j++;
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t\"%u\": {\n", i);
-					} else {
-						malloc_cprintf(write_cb,
-						    cbopaque, "\narenas[%u]:\n",
-						    i);
-					}
-					stats_arena_print(write_cb, cbopaque,
-					    json, i, bins, large, huge);
-					if (json) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t}%s\n", (j <
-						    ninitialized) ? "," : "");
+			if (unmerged) {
+				for (i = j = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						if (json) {
+							j++;
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t\"%u\": {\n",
+							    i);
+						} else {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\narenas[%u]:\n",
+							    i);
+						}
+						stats_arena_print(write_cb,
+						    cbopaque, json, i, bins,
+						    large, huge);
+						if (json) {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t}%s\n", (j <
+							    ninitialized) ? ","
+							    : "");
+						}
 					}
 				}
 			}
@ -1069,8 +1086,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t u64sz;
 	bool json = false;
 	bool general = true;
-	bool merged = true;
-	bool unmerged = true;
+	bool merged = config_stats;
+	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
 	bool huge = true;
@ -1137,8 +1154,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "___ Begin jemalloc statistics ___\n");
 	}

-	if (general)
-		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
+	if (general) {
+		bool more = (merged || unmerged);
+		stats_general_print(write_cb, cbopaque, json, more);
+	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
 		    bins, large, huge);
--- a/contrib/jemalloc/src/tcache.c
+++ b/contrib/jemalloc/src/tcache.c
@ -21,6 +21,9 @@ static unsigned		tcaches_past;
 /* Head of singly linked list tracking available tcaches elements. */
 static tcaches_t	*tcaches_avail;

+/* Protects tcaches{,_past,_avail}. */
+static malloc_mutex_t	tcaches_mtx;
+
 /******************************************************************************/

 size_t
@ -444,29 +447,56 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 	}
 }

-bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
-{
-	arena_t *arena;
-	tcache_t *tcache;
-	tcaches_t *elm;
+static bool
+tcaches_create_prep(tsd_t *tsd) {
+	bool err;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);

 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
-		if (tcaches == NULL)
-			return (true);
+		if (tcaches == NULL) {
+			err = true;
+			goto label_return;
+		}
 	}

-	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
-		return (true);
-	arena = arena_ichoose(tsd, NULL);
-	if (unlikely(arena == NULL))
-		return (true);
-	tcache = tcache_create(tsd_tsdn(tsd), arena);
-	if (tcache == NULL)
-		return (true);
+	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
+		err = true;
+		goto label_return;
+	}

+	err = false;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
+}
+
+bool
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+	bool err;
+	arena_t *arena;
+	tcache_t *tcache;
+	tcaches_t *elm;
+
+	if (tcaches_create_prep(tsd)) {
+		err = true;
+		goto label_return;
+	}
+
+	arena = arena_ichoose(tsd, NULL);
+	if (unlikely(arena == NULL)) {
+		err = true;
+		goto label_return;
+	}
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
+	if (tcache == NULL) {
+		err = true;
+		goto label_return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
@ -478,41 +508,50 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 		*r_ind = tcaches_past;
 		tcaches_past++;
 	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);

-	return (false);
+	err = false;
+label_return:
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
 }

 static void
-tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
-{
+tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);

-	if (elm->tcache == NULL)
+	if (elm->tcache == NULL) {
 		return;
+	}
 	tcache_destroy(tsd, elm->tcache);
 	elm->tcache = NULL;
 }

 void
-tcaches_flush(tsd_t *tsd, unsigned ind)
-{
-
+tcaches_flush(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_elm_flush(tsd, &tcaches[ind]);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }

 void
-tcaches_destroy(tsd_t *tsd, unsigned ind)
-{
-	tcaches_t *elm = &tcaches[ind];
+tcaches_destroy(tsd_t *tsd, unsigned ind) {
+	tcaches_t *elm;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	elm = &tcaches[ind];
 	tcaches_elm_flush(tsd, elm);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }

 bool
-tcache_boot(tsdn_t *tsdn)
-{
+tcache_boot(tsdn_t *tsdn) {
 	unsigned i;

+	cassert(config_tcache);
+
 	/*
 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
 	 * known.
@ -524,6 +563,10 @@ tcache_boot(tsdn_t *tsdn)
 	else
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);

+	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) {
+		return true;
+	}
+
 	nhbins = size2index(tcache_maxclass) + 1;

 	/* Initialize tcache_bin_info. */
@ -553,3 +596,24 @@ tcache_boot(tsdn_t *tsdn)

 	return (false);
 }
+
+void
+tcache_prefork(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_prefork(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_parent(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_child(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
+	}
+}
--- a/contrib/jemalloc/src/witness.c
+++ b/contrib/jemalloc/src/witness.c
@ -71,15 +71,16 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif

 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(n_witness_depth_error)
 #endif
 void
-witness_lockless_error(const witness_list_t *witnesses)
-{
+witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
 	witness_t *w;

-	malloc_printf("<jemalloc>: Should not own any locks:");
+	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
+	    (depth != 1) ?  "s" : "", rank_inclusive);
 	ql_foreach(w, witnesses, link) {
 		malloc_printf(" %s(%u)", w->name, w->rank);
 	}
@ -87,10 +88,9 @@ witness_lockless_error(const witness_list_t *witnesses)
 	abort();
 }
 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
-witness_lockless_error_t *witness_lockless_error =
-    JEMALLOC_N(n_witness_lockless_error);
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(witness_depth_error)
+witness_depth_error_t *witness_depth_error = JEMALLOC_N(n_witness_depth_error);
 #endif

 void