diff --git a/contrib/jemalloc/COPYING b/contrib/jemalloc/COPYING index 019e8132275d..bdda0feb9e5d 100644 --- a/contrib/jemalloc/COPYING +++ b/contrib/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2013 Jason Evans . +Copyright (C) 2002-2014 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index 0efc7426d72c..c5e4198da375 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -6,6 +6,59 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.5.0 (January 22, 2014) + + This version focuses on refactoring and automated testing, though it also + includes some non-trivial heap profiling optimizations not mentioned below. + + New features: + - Add the *allocx() API, which is a successor to the experimental *allocm() + API. The *allocx() functions are slightly simpler to use because they have + fewer parameters, they directly return the results of primary interest, and + mallocx()/rallocx() avoid the strict aliasing pitfall that + allocm()/rallocx() share with posix_memalign(). Note that *allocm() is + slated for removal in the next non-bugfix release. + - Add support for LinuxThreads. + + Bug fixes: + - Unless heap profiling is enabled, disable floating point code and don't link + with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 + systems, makes it possible to completely disable floating point register + use. Some versions of glibc neglect to save/restore caller-saved floating + point registers during dynamic lazy symbol loading, and the symbol loading + code uses whatever malloc the application happens to have linked/loaded + with, the result being potential floating point register corruption. + - Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling + backtrace creation in imemalign(). This bug impacted posix_memalign() and + aligned_alloc(). + - Fix a file descriptor leak in a prof_dump_maps() error path. + - Fix prof_dump() to close the dump file descriptor for all relevant error + paths. + - Fix rallocm() to use the arena specified by the ALLOCM_ARENA(s) flag for + allocation, not just deallocation. + - Fix a data race for large allocation stats counters. + - Fix a potential infinite loop during thread exit. This bug occurred on + Solaris, and could affect other platforms with similar pthreads TSD + implementations. + - Don't junk-fill reallocations unless usable size changes. This fixes a + violation of the *allocx()/*allocm() semantics. + - Fix growing large reallocation to junk fill new space. + - Fix huge deallocation to junk fill when munmap is disabled. + - Change the default private namespace prefix from empty to je_, and change + --with-private-namespace-prefix so that it prepends an additional prefix + rather than replacing je_. This reduces the likelihood of applications + which statically link jemalloc experiencing symbol name collisions. + - Add missing private namespace mangling (relevant when + --with-private-namespace is specified). + - Add and use JEMALLOC_INLINE_C so that static inline functions are marked as + static even for debug builds. + - Add a missing mutex unlock in a malloc_init_hard() error path. In practice + this error path is never executed. + - Fix numerous bugs in malloc_strotumax() error handling/reporting. These + bugs had no impact except for malformed inputs. + - Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by + existing calls, so they had no impact. + * 3.4.1 (October 20, 2013) Bug fixes: diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist index cd0f8add7b60..63fc0607803e 100644 --- a/contrib/jemalloc/FREEBSD-Xlist +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -10,14 +10,35 @@ autom4te.cache/ bin/ config.* configure* +coverage.sh doc/*.in doc/*.xml doc/*.xsl doc/*.html include/jemalloc/internal/jemalloc_internal.h.in +include/jemalloc/internal/jemalloc_internal_defs.h.in +include/jemalloc/internal/private_namespace.sh +include/jemalloc/internal/private_symbols.txt +include/jemalloc/internal/private_unnamespace.h +include/jemalloc/internal/private_unnamespace.sh +include/jemalloc/internal/public_namespace.sh +include/jemalloc/internal/public_symbols.txt +include/jemalloc/internal/public_unnamespace.h +include/jemalloc/internal/public_unnamespace.sh include/jemalloc/internal/size_classes.sh include/jemalloc/jemalloc.h.in +include/jemalloc/jemalloc.sh include/jemalloc/jemalloc_defs.h.in +include/jemalloc/jemalloc_macros.h +include/jemalloc/jemalloc_macros.h.in +include/jemalloc/jemalloc_mangle_jet.h +include/jemalloc/jemalloc_mangle.sh +include/jemalloc/jemalloc_mangle.h +include/jemalloc/jemalloc_protos_jet.h +include/jemalloc/jemalloc_protos.h +include/jemalloc/jemalloc_protos.h.in +include/jemalloc/jemalloc_rename.h +include/jemalloc/jemalloc_rename.sh include/msvc_compat/ install-sh src/zone.c diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index ac22cf54e0ac..e621af750ed0 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -1,8 +1,8 @@ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in -index abd5e6f..1d7491a 100644 +index c7e2e87..2bd59f0 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in -@@ -51,12 +51,23 @@ +@@ -57,12 +57,23 @@ This manual describes jemalloc @jemalloc_version@. More information can be found at the jemalloc website. @@ -27,7 +27,7 @@ index abd5e6f..1d7491a 100644 Standard API -@@ -2180,4 +2191,16 @@ malloc_conf = "lg_chunk:24";]]> +@@ -2338,4 +2349,19 @@ malloc_conf = "lg_chunk:24";]]> The posix_memalign function conforms to IEEE Std 1003.1-2001 (“POSIX.1”). @@ -42,32 +42,35 @@ index abd5e6f..1d7491a 100644 + mallctl*, and + *allocm functions first appeared in + FreeBSD 10.0. ++ ++ The *allocx functions first appeared ++ in FreeBSD 11.0. + diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in -index 53c135c..c547339 100644 +index d24a1fe..d101c3d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,5 +1,8 @@ #ifndef JEMALLOC_INTERNAL_H - #define JEMALLOC_INTERNAL_H + #define JEMALLOC_INTERNAL_H +#include "libc_private.h" +#include "namespace.h" + #include #ifdef _WIN32 # include -@@ -54,6 +57,9 @@ typedef intptr_t ssize_t; +@@ -65,6 +68,9 @@ typedef intptr_t ssize_t; + #include #endif - #include +#include "un-namespace.h" +#include "libc_private.h" + #define JEMALLOC_NO_DEMANGLE - #include "../jemalloc@install_suffix@.h" - -@@ -95,13 +101,7 @@ static const bool config_fill = + #ifdef JEMALLOC_JET + # define JEMALLOC_N(n) jet_##n +@@ -99,13 +105,7 @@ static const bool config_fill = false #endif ; @@ -96,36 +99,24 @@ index de44e14..564d604 100644 #endif bool malloc_mutex_init(malloc_mutex_t *mutex); -diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h -index cdb0b0e..2a98d1f 100644 ---- a/include/jemalloc/internal/private_namespace.h -+++ b/include/jemalloc/internal/private_namespace.h -@@ -218,7 +218,6 @@ - #define iralloc JEMALLOC_N(iralloc) - #define irallocx JEMALLOC_N(irallocx) - #define isalloc JEMALLOC_N(isalloc) --#define isthreaded JEMALLOC_N(isthreaded) - #define ivsalloc JEMALLOC_N(ivsalloc) - #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) - #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) -diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in -index 31b1304..c3ef2f5 100644 ---- a/include/jemalloc/jemalloc.h.in -+++ b/include/jemalloc/jemalloc.h.in -@@ -15,6 +15,7 @@ extern "C" { - #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" - - #include "jemalloc_defs@install_suffix@.h" -+#include "jemalloc_FreeBSD.h" - - #ifdef JEMALLOC_EXPERIMENTAL - #define ALLOCM_LG_ALIGN(la) (la) +diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt +index 1e64ed5..29ddba3 100644 +--- a/include/jemalloc/internal/private_symbols.txt ++++ b/include/jemalloc/internal/private_symbols.txt +@@ -225,7 +225,6 @@ iralloc + iralloct + iralloct_realign + isalloc +-isthreaded + ivsalloc + ixalloc + jemalloc_postfork_child diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h new file mode 100644 -index 0000000..e6c8407 +index 0000000..94554bc --- /dev/null +++ b/include/jemalloc/jemalloc_FreeBSD.h -@@ -0,0 +1,117 @@ +@@ -0,0 +1,134 @@ +/* + * Override settings that were generated in jemalloc_defs.h as necessary. + */ @@ -202,6 +193,12 @@ index 0000000..e6c8407 +#undef je_free +#undef je_posix_memalign +#undef je_malloc_usable_size ++#undef je_mallocx ++#undef je_rallocx ++#undef je_xallocx ++#undef je_sallocx ++#undef je_dallocx ++#undef je_nallocx +#undef je_allocm +#undef je_rallocm +#undef je_sallocm @@ -213,6 +210,12 @@ index 0000000..e6c8407 +#define je_free __free +#define je_posix_memalign __posix_memalign +#define je_malloc_usable_size __malloc_usable_size ++#define je_mallocx __mallocx ++#define je_rallocx __rallocx ++#define je_xallocx __xallocx ++#define je_sallocx __sallocx ++#define je_dallocx __dallocx ++#define je_nallocx __nallocx +#define je_allocm __allocm +#define je_rallocm __rallocm +#define je_sallocm __sallocm @@ -236,15 +239,31 @@ index 0000000..e6c8407 +__weak_reference(__free, free); +__weak_reference(__posix_memalign, posix_memalign); +__weak_reference(__malloc_usable_size, malloc_usable_size); ++__weak_reference(__mallocx, mallocx); ++__weak_reference(__rallocx, rallocx); ++__weak_reference(__xallocx, xallocx); ++__weak_reference(__sallocx, sallocx); ++__weak_reference(__dallocx, dallocx); ++__weak_reference(__nallocx, nallocx); +__weak_reference(__allocm, allocm); +__weak_reference(__rallocm, rallocm); +__weak_reference(__sallocm, sallocm); +__weak_reference(__dallocm, dallocm); +__weak_reference(__nallocm, nallocm); +#endif +diff --git a/include/jemalloc/jemalloc_rename.sh b/include/jemalloc/jemalloc_rename.sh +index f943891..47d032c 100755 +--- a/include/jemalloc/jemalloc_rename.sh ++++ b/include/jemalloc/jemalloc_rename.sh +@@ -19,4 +19,6 @@ done + + cat < -.\" Date: 10/20/2013 +.\" Date: 01/22/2014 .\" Manual: User Manual -.\" Source: jemalloc 3.4.1-0-g0135fb806e4137dc9cdf152541926a2bc95e33f0 +.\" Source: jemalloc 3.5.0-0-gcc47dde16203a6ae7eb685b53e1ae501f3869bc6 .\" Language: English .\" -.TH "JEMALLOC" "3" "10/20/2013" "jemalloc 3.4.1-0-g0135fb806e41" "User Manual" +.TH "JEMALLOC" "3" "01/22/2014" "jemalloc 3.5.0-0-gcc47dde16203" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 3\&.4\&.1\-0\-g0135fb806e4137dc9cdf152541926a2bc95e33f0\&. More information can be found at the +This manual describes jemalloc 3\&.5\&.0\-0\-gcc47dde16203a6ae7eb685b53e1ae501f3869bc6\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: @@ -71,16 +71,28 @@ make variable)\&. .HP \w'void\ free('u .BI "void free(void\ *" "ptr" ");" .SS "Non\-standard API" -.HP \w'size_t\ malloc_usable_size('u -.BI "size_t malloc_usable_size(const\ void\ *" "ptr" ");" -.HP \w'void\ malloc_stats_print('u -.BI "void malloc_stats_print(void\ " "(*write_cb)" "\ (void\ *,\ const\ char\ *), void\ *" "cbopaque" ", const\ char\ *" "opts" ");" +.HP \w'void\ *mallocx('u +.BI "void *mallocx(size_t\ " "size" ", int\ " "flags" ");" +.HP \w'void\ *rallocx('u +.BI "void *rallocx(void\ *" "ptr" ", size_t\ " "size" ", int\ " "flags" ");" +.HP \w'size_t\ xallocx('u +.BI "size_t xallocx(void\ *" "ptr" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");" +.HP \w'size_t\ sallocx('u +.BI "size_t sallocx(void\ *" "ptr" ", int\ " "flags" ");" +.HP \w'void\ dallocx('u +.BI "void dallocx(void\ *" "ptr" ", int\ " "flags" ");" +.HP \w'size_t\ nallocx('u +.BI "size_t nallocx(size_t\ " "size" ", int\ " "flags" ");" .HP \w'int\ mallctl('u .BI "int mallctl(const\ char\ *" "name" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" .HP \w'int\ mallctlnametomib('u .BI "int mallctlnametomib(const\ char\ *" "name" ", size_t\ *" "mibp" ", size_t\ *" "miblenp" ");" .HP \w'int\ mallctlbymib('u .BI "int mallctlbymib(const\ size_t\ *" "mib" ", size_t\ " "miblen" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" +.HP \w'void\ malloc_stats_print('u +.BI "void malloc_stats_print(void\ " "(*write_cb)" "\ (void\ *,\ const\ char\ *), void\ *" "cbopaque" ", const\ char\ *" "opts" ");" +.HP \w'size_t\ malloc_usable_size('u +.BI "size_t malloc_usable_size(const\ void\ *" "ptr" ");" .HP \w'void\ (*malloc_message)('u .BI "void (*malloc_message)(void\ *" "cbopaque" ", const\ char\ *" "s" ");" .PP @@ -172,36 +184,105 @@ is .SS "Non\-standard API" .PP The -\fBmalloc_usable_size\fR\fB\fR -function returns the usable size of the allocation pointed to by -\fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The -\fBmalloc_usable_size\fR\fB\fR -function is not a mechanism for in\-place -\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by -\fBmalloc_usable_size\fR\fB\fR -should not be depended on, since such behavior is entirely implementation\-dependent\&. +\fBmallocx\fR\fB\fR, +\fBrallocx\fR\fB\fR, +\fBxallocx\fR\fB\fR, +\fBsallocx\fR\fB\fR, +\fBdallocx\fR\fB\fR, and +\fBnallocx\fR\fB\fR +functions all have a +\fIflags\fR +argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: +.PP +\fBMALLOCX_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR +.RS 4 +Align the memory allocation to start at an address that is a multiple of +(1 << \fIla\fR)\&. This macro does not validate that +\fIla\fR +is within the valid range\&. +.RE +.PP +\fBMALLOCX_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR +.RS 4 +Align the memory allocation to start at an address that is a multiple of +\fIa\fR, where +\fIa\fR +is a power of two\&. This macro does not validate that +\fIa\fR +is a power of 2\&. +.RE +.PP +\fBMALLOCX_ZERO\fR +.RS 4 +Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&. +.RE +.PP +\fBMALLOCX_ARENA(\fR\fB\fIa\fR\fR\fB) \fR +.RS 4 +Use the arena specified by the index +\fIa\fR +(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that +\fIa\fR +specifies an arena index in the valid range\&. +.RE .PP The -\fBmalloc_stats_print\fR\fB\fR -function writes human\-readable summary statistics via the -\fIwrite_cb\fR -callback function pointer and -\fIcbopaque\fR -data passed to -\fIwrite_cb\fR, or -\fBmalloc_message\fR\fB\fR -if -\fIwrite_cb\fR +\fBmallocx\fR\fB\fR +function allocates at least +\fIsize\fR +bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if +\fIsize\fR is -\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the -\fIopts\fR -string\&. Note that -\fBmalloc_message\fR\fB\fR -uses the -\fBmallctl*\fR\fB\fR -functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If -\fB\-\-enable\-stats\fR -is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. +.PP +The +\fBrallocx\fR\fB\fR +function resizes the allocation at +\fIptr\fR +to be at least +\fIsize\fR +bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. +.PP +The +\fBxallocx\fR\fB\fR +function resizes the allocation at +\fIptr\fR +in place to be at least +\fIsize\fR +bytes, and returns the real size of the allocation\&. If +\fIextra\fR +is non\-zero, an attempt is made to resize the allocation to be at least +(\fIsize\fR + \fIextra\fR) +bytes, though inability to allocate the extra byte(s) will not by itself result in failure to resize\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR, or if +(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. +.PP +The +\fBsallocx\fR\fB\fR +function returns the real size of the allocation at +\fIptr\fR\&. +.PP +The +\fBdallocx\fR\fB\fR +function causes the memory referenced by +\fIptr\fR +to be made available for future allocations\&. +.PP +The +\fBnallocx\fR\fB\fR +function allocates no memory, but it performs the same size computation as the +\fBmallocx\fR\fB\fR +function, and returns the real size of the allocation that would result from the equivalent +\fBmallocx\fR\fB\fR +function call\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .PP The \fBmallctl\fR\fB\fR @@ -245,15 +326,14 @@ that is smaller than the number of period\-separated name components, which resu .\} .nf unsigned nbins, i; - -int mib[4]; +size_t mib[4]; size_t len, miblen; len = sizeof(nbins); mallctl("arenas\&.nbins", &nbins, &len, NULL, 0); miblen = 4; -mallnametomib("arenas\&.bin\&.0\&.size", mib, &miblen); +mallctlnametomib("arenas\&.bin\&.0\&.size", mib, &miblen); for (i = 0; i < nbins; i++) { size_t bin_size; @@ -266,6 +346,38 @@ for (i = 0; i < nbins; i++) { .if n \{\ .RE .\} +.PP +The +\fBmalloc_stats_print\fR\fB\fR +function writes human\-readable summary statistics via the +\fIwrite_cb\fR +callback function pointer and +\fIcbopaque\fR +data passed to +\fIwrite_cb\fR, or +\fBmalloc_message\fR\fB\fR +if +\fIwrite_cb\fR +is +\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the +\fIopts\fR +string\&. Note that +\fBmalloc_message\fR\fB\fR +uses the +\fBmallctl*\fR\fB\fR +functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If +\fB\-\-enable\-stats\fR +is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +.PP +The +\fBmalloc_usable_size\fR\fB\fR +function returns the usable size of the allocation pointed to by +\fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The +\fBmalloc_usable_size\fR\fB\fR +function is not a mechanism for in\-place +\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by +\fBmalloc_usable_size\fR\fB\fR +should not be depended on, since such behavior is entirely implementation\-dependent\&. .SS "Experimental API" .PP The experimental API is subject to change or removal without regard for backward compatibility\&. If @@ -302,7 +414,7 @@ is a power of 2\&. .PP \fBALLOCM_ZERO\fR .RS 4 -Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this option is absent, newly allocated memory is uninitialized\&. +Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&. .RE .PP \fBALLOCM_NO_MOVE\fR @@ -313,9 +425,10 @@ For reallocation, fail rather than moving the object\&. This constraint can appl \fBALLOCM_ARENA(\fR\fB\fIa\fR\fR\fB) \fR .RS 4 Use the arena specified by the index -\fIa\fR\&. This macro does not validate that \fIa\fR -specifies an arena in the valid range\&. +(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that +\fIa\fR +specifies an arena index in the valid range\&. .RE .PP The @@ -332,7 +445,7 @@ is not \fBNULL\fR\&. Behavior is undefined if \fIsize\fR is -\fB0\fR\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .PP The \fBrallocm\fR\fB\fR @@ -350,11 +463,11 @@ is not \fBNULL\fR\&. If \fIextra\fR is non\-zero, an attempt is made to resize the allocation to be at least -\fIsize\fR + \fIextra\fR) +(\fIsize\fR + \fIextra\fR) bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if \fIsize\fR is -\fB0\fR, or if +\fB0\fR, if request size overflows due to size class and/or alignment constraints, or if (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. .PP The @@ -384,7 +497,7 @@ to the real size of the allocation that would result from the equivalent function call\&. Behavior is undefined if \fIsize\fR is -\fB0\fR\&. +\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&. .SH "TUNING" .PP Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&. @@ -662,16 +775,18 @@ in these cases\&. This option is disabled by default unless is specified during configuration, in which case it is enabled by default\&. .RE .PP -"opt\&.lg_chunk" (\fBsize_t\fR) r\- -.RS 4 -Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 4 MiB (2^22)\&. -.RE -.PP "opt\&.dss" (\fBconst char *\fR) r\- .RS 4 dss (\fBsbrk\fR(2)) allocation precedence as related to \fBmmap\fR(2) -allocation\&. The following settings are supported: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq (default)\&. +allocation\&. The following settings are supported: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq\&. The default is \(lqsecondary\(rq if +"config\&.dss" +is true, \(lqdisabled\(rq otherwise\&. +.RE +.PP +"opt\&.lg_chunk" (\fBsize_t\fR) r\- +.RS 4 +Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 4 MiB (2^22)\&. .RE .PP "opt\&.narenas" (\fBsize_t\fR) r\- @@ -726,7 +841,8 @@ option is enabled, the redzones are checked for corruption during deallocation\& "opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] .RS 4 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so -\fBrealloc\fR\fB\fR +\fBrealloc\fR\fB\fR, +\fBrallocx\fR\fB\fR and \fBrallocm\fR\fB\fR calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. @@ -804,7 +920,7 @@ Filename prefix for profile dumps\&. If the prefix is set to the empty string, n jeprof\&. .RE .PP -"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +"opt\&.prof_active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the "opt\&.prof" @@ -1119,7 +1235,7 @@ Number of or similar calls made to purge dirty pages\&. .RE .PP -"stats\&.arenas\&.\&.npurged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +"stats\&.arenas\&.\&.purged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Number of pages purged\&. .RE @@ -1342,11 +1458,32 @@ function returns no value\&. .SS "Non\-standard API" .PP The -\fBmalloc_usable_size\fR\fB\fR -function returns the usable size of the allocation pointed to by +\fBmallocx\fR\fB\fR +and +\fBrallocx\fR\fB\fR +functions return a pointer to the allocated memory if successful; otherwise a +\fBNULL\fR +pointer is returned to indicate insufficient contiguous memory was available to service the allocation request\&. +.PP +The +\fBxallocx\fR\fB\fR +function returns the real size of the resulting resized allocation pointed to by +\fIptr\fR, which is a value less than +\fIsize\fR +if the allocation could not be adequately grown in place\&. +.PP +The +\fBsallocx\fR\fB\fR +function returns the real size of the allocation pointed to by \fIptr\fR\&. .PP The +\fBnallocx\fR\fB\fR +returns the real size that would result from a successful equivalent +\fBmallocx\fR\fB\fR +function call, or zero if insufficient memory is available to perform the size computation\&. +.PP +The \fBmallctl\fR\fB\fR, \fBmallctlnametomib\fR\fB\fR, and \fBmallctlbymib\fR\fB\fR @@ -1363,12 +1500,6 @@ is too large or too small\&. Alternatively, is too large or too small; in this case as much data as possible are read despite the error\&. .RE .PP -ENOMEM -.RS 4 -\fI*oldlenp\fR -is too short to hold the requested value\&. -.RE -.PP ENOENT .RS 4 \fIname\fR @@ -1393,6 +1524,11 @@ An interface with side effects failed in some way not directly related to \fBmallctl*\fR\fB\fR read/write processing\&. .RE +.PP +The +\fBmalloc_usable_size\fR\fB\fR +function returns the usable size of the allocation pointed to by +\fIptr\fR\&. .SS "Experimental API" .PP The @@ -1501,6 +1637,10 @@ The \fBmallctl*\fR\fB\fR, and \fB*allocm\fR\fB\fR functions first appeared in FreeBSD 10\&.0\&. +.PP +The +\fB*allocx\fR\fB\fR +functions first appeared in FreeBSD 11\&.0\&. .SH "AUTHOR" .PP \fBJason Evans\fR diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h index bbcfedacead9..9d000c03decb 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -158,6 +158,7 @@ struct arena_chunk_map_s { }; typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; +typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; /* Arena chunk header. */ struct arena_chunk_s { @@ -174,11 +175,12 @@ struct arena_chunk_s { size_t nruns_avail; /* - * Number of available run adjacencies. Clean and dirty available runs - * are not coalesced, which causes virtual memory fragmentation. The - * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking - * this fragmentation. - * */ + * Number of available run adjacencies that purging could coalesce. + * Clean and dirty available runs are not coalesced, which causes + * virtual memory fragmentation. The ratio of + * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this + * fragmentation. + */ size_t nruns_adjac; /* @@ -404,7 +406,16 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); +#ifdef JEMALLOC_JET +typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, + uint8_t); +extern arena_redzone_corruption_t *arena_redzone_corruption; +typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); +extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; +#else void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +#endif +void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); @@ -415,10 +426,18 @@ void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_t *mapelm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind); +#ifdef JEMALLOC_JET +typedef void (arena_dalloc_junk_large_t)(void *, size_t); +extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#endif void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +#ifdef JEMALLOC_JET +typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); +extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; +#endif +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, @@ -473,7 +492,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, @@ -885,10 +904,10 @@ arena_prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; - size_t pageind, mapbits; + size_t pageind; cassert(config_prof); assert(ptr != NULL); @@ -896,10 +915,17 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + + if (usize > SMALL_MAXCLASS || (prof_promote && + ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, + pageind) != 0))) { + assert(arena_mapbits_large_get(chunk, pageind) != 0); + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + } else { + assert(arena_mapbits_large_get(chunk, pageind) == 0); if (prof_promote == false) { + size_t mapbits = arena_mapbits_get(chunk, pageind); arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); @@ -911,12 +937,11 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + *((prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t + *)))) = ctx; + } + } } JEMALLOC_ALWAYS_INLINE void * diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h index 6585f071bbec..4535ce09c09a 100644 --- a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -7,7 +7,7 @@ typedef enum { dss_prec_secondary = 2, dss_prec_limit = 3 -} dss_prec_t ; +} dss_prec_t; #define DSS_PREC_DEFAULT dss_prec_secondary #define DSS_DEFAULT "secondary" diff --git a/contrib/jemalloc/include/jemalloc/internal/ckh.h b/contrib/jemalloc/include/jemalloc/internal/ckh.h index 50c39ed95819..58712a6a763e 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ckh.h +++ b/contrib/jemalloc/include/jemalloc/internal/ckh.h @@ -17,7 +17,7 @@ typedef bool ckh_keycomp_t (const void *, const void *); * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit * one bucket per L1 cache line. */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/hash.h b/contrib/jemalloc/include/jemalloc/internal/hash.h index 56ecc793b365..09b69df515be 100644 --- a/contrib/jemalloc/include/jemalloc/internal/hash.h +++ b/contrib/jemalloc/include/jemalloc/internal/hash.h @@ -19,6 +19,11 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +uint32_t hash_x86_32(const void *key, int len, uint32_t seed); +void hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]); +void hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]); void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]); #endif @@ -43,14 +48,14 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { - return p[i]; + return (p[i]); } JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { - return p[i]; + return (p[i]); } JEMALLOC_INLINE uint32_t @@ -63,7 +68,7 @@ hash_fmix_32(uint32_t h) h *= 0xc2b2ae35; h ^= h >> 16; - return h; + return (h); } JEMALLOC_INLINE uint64_t @@ -76,7 +81,7 @@ hash_fmix_64(uint64_t k) k *= QU(0xc4ceb9fe1a85ec53LLU); k ^= k >> 33; - return k; + return (k); } JEMALLOC_INLINE uint32_t @@ -127,12 +132,12 @@ hash_x86_32(const void *key, int len, uint32_t seed) h1 = hash_fmix_32(h1); - return h1; + return (h1); } UNUSED JEMALLOC_INLINE void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) + uint64_t r_out[2]) { const uint8_t * data = (const uint8_t *) key; const int nblocks = len / 16; @@ -234,7 +239,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed, UNUSED JEMALLOC_INLINE void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) + uint64_t r_out[2]) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 16; @@ -310,7 +315,6 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, r_out[1] = h2; } - /******************************************************************************/ /* API. */ JEMALLOC_INLINE void diff --git a/contrib/jemalloc/include/jemalloc/internal/huge.h b/contrib/jemalloc/include/jemalloc/internal/huge.h index d987d370767a..ddf13138ad73 100644 --- a/contrib/jemalloc/include/jemalloc/internal/huge.h +++ b/contrib/jemalloc/include/jemalloc/internal/huge.h @@ -19,10 +19,14 @@ extern malloc_mutex_t huge_mtx; void *huge_malloc(size_t size, bool zero); void *huge_palloc(size_t size, size_t alignment, bool zero); -void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc); +#ifdef JEMALLOC_JET +typedef void (huge_dalloc_junk_t)(void *, size_t); +extern huge_dalloc_junk_t *huge_dalloc_junk; +#endif void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); prof_ctx_t *huge_prof_ctx_get(const void *ptr); diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h index 95b7c6953a3d..aad7b6c2ef76 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -1,5 +1,5 @@ #ifndef JEMALLOC_INTERNAL_H -#define JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H #include "libc_private.h" #include "namespace.h" @@ -57,11 +57,7 @@ typedef intptr_t ssize_t; #endif #include -#include "un-namespace.h" -#include "libc_private.h" - -#define JEMALLOC_NO_DEMANGLE -#include "../jemalloc.h" +#include "jemalloc_internal_defs.h" #ifdef JEMALLOC_UTRACE #include @@ -72,13 +68,21 @@ typedef intptr_t ssize_t; #include #endif -#include "jemalloc/internal/private_namespace.h" +#include "un-namespace.h" +#include "libc_private.h" -#ifdef JEMALLOC_CC_SILENCE -#define UNUSED JEMALLOC_ATTR(unused) +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc.h" +# undef JEMALLOC_NO_RENAME #else -#define UNUSED +# define JEMALLOC_N(n) __je_##n +# include "../jemalloc.h" #endif +#include "jemalloc/internal/private_namespace.h" static const bool config_debug = #ifdef JEMALLOC_DEBUG @@ -221,48 +225,13 @@ static const bool config_ivsalloc = * JEMALLOC_H_INLINES : Inline functions. */ /******************************************************************************/ -#define JEMALLOC_H_TYPES +#define JEMALLOC_H_TYPES +#include "jemalloc/internal/jemalloc_internal_macros.h" + +#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) -#define ZU(z) ((size_t)z) -#define QU(q) ((uint64_t)q) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - -/* - * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are - * static inline functions if inlining is enabled, and single-definition - * library-private functions if inlining is disabled. - * - * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted - * functions are always static, regardless of whether inlining is enabled. - */ -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif - /* Smallest size class to support. */ #define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) @@ -493,7 +462,7 @@ static const bool config_ivsalloc = #undef JEMALLOC_H_TYPES /******************************************************************************/ -#define JEMALLOC_H_STRUCTS +#define JEMALLOC_H_STRUCTS #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -522,14 +491,14 @@ typedef struct { uint64_t deallocated; } thread_allocated_t; /* - * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro * argument. */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) #undef JEMALLOC_H_STRUCTS /******************************************************************************/ -#define JEMALLOC_H_EXTERNS +#define JEMALLOC_H_EXTERNS extern bool opt_abort; extern bool opt_junk; @@ -589,7 +558,7 @@ void jemalloc_postfork_child(void); #undef JEMALLOC_H_EXTERNS /******************************************************************************/ -#define JEMALLOC_H_INLINES +#define JEMALLOC_H_INLINES #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -764,32 +733,36 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE -void *imallocx(size_t size, bool try_tcache, arena_t *arena); +void *imalloct(size_t size, bool try_tcache, arena_t *arena); void *imalloc(size_t size); -void *icallocx(size_t size, bool try_tcache, arena_t *arena); +void *icalloct(size_t size, bool try_tcache, arena_t *arena); void *icalloc(size_t size); -void *ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, +void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena); void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr, bool demote); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idallocx(void *ptr, bool try_tcache); +void idalloct(void *ptr, bool try_tcache); void idalloc(void *ptr); -void iqallocx(void *ptr, bool try_tcache); +void iqalloct(void *ptr, bool try_tcache); void iqalloc(void *ptr); -void *irallocx(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, +void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); +void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move); + bool zero); +bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) JEMALLOC_ALWAYS_INLINE void * -imallocx(size_t size, bool try_tcache, arena_t *arena) +imalloct(size_t size, bool try_tcache, arena_t *arena) { assert(size != 0); @@ -804,11 +777,11 @@ JEMALLOC_ALWAYS_INLINE void * imalloc(size_t size) { - return (imallocx(size, true, NULL)); + return (imalloct(size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icallocx(size_t size, bool try_tcache, arena_t *arena) +icalloct(size_t size, bool try_tcache, arena_t *arena) { if (size <= arena_maxclass) @@ -821,11 +794,11 @@ JEMALLOC_ALWAYS_INLINE void * icalloc(size_t size) { - return (icallocx(size, true, NULL)); + return (icalloct(size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, +ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { void *ret; @@ -853,7 +826,7 @@ JEMALLOC_ALWAYS_INLINE void * ipalloc(size_t usize, size_t alignment, bool zero) { - return (ipallocx(usize, alignment, zero, true, NULL)); + return (ipalloct(usize, alignment, zero, true, NULL)); } /* @@ -885,7 +858,7 @@ ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) return (0); return (isalloc(ptr, demote)); @@ -914,7 +887,7 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idallocx(void *ptr, bool try_tcache) +idalloct(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -931,31 +904,63 @@ JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { - idallocx(ptr, true); + idalloct(ptr, true); } JEMALLOC_ALWAYS_INLINE void -iqallocx(void *ptr, bool try_tcache) +iqalloct(void *ptr, bool try_tcache) { if (config_fill && opt_quarantine) quarantine(ptr); else - idallocx(ptr, try_tcache); + idalloct(ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr) { - iqallocx(ptr, true); + iqalloct(ptr, true); } JEMALLOC_ALWAYS_INLINE void * -irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena) +{ + void *p; + size_t usize, copysize; + + usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); + return (p); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { - void *ret; size_t oldsize; assert(ptr != NULL); @@ -965,68 +970,50 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { - size_t usize, copysize; - /* * Existing object alignment is inadequate; allocate new space * and copy. */ - if (no_move) - return (NULL); - usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, - arena); - if (ret == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller - * has no expectation that the extra bytes will be reliably - * preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); - return (ret); + return (iralloct_realign(ptr, oldsize, size, extra, alignment, + zero, try_tcache_alloc, try_tcache_dalloc, arena)); } - if (no_move) { - if (size <= arena_maxclass) { - return (arena_ralloc_no_move(ptr, oldsize, size, - extra, zero)); - } else { - return (huge_ralloc_no_move(ptr, oldsize, size, - extra)); - } + if (size + extra <= arena_maxclass) { + return (arena_ralloc(arena, ptr, oldsize, size, extra, + alignment, zero, try_tcache_alloc, + try_tcache_dalloc)); } else { - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc)); - } + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero, try_tcache_dalloc)); } } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move) +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) { - return (irallocx(ptr, size, extra, alignment, zero, no_move, true, true, - NULL)); + return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr, config_prof); + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return (true); + } + + if (size <= arena_maxclass) + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + else + return (huge_ralloc_no_move(ptr, oldsize, size, extra)); } malloc_tsd_externs(thread_allocated, thread_allocated_t) diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h similarity index 70% rename from contrib/jemalloc/include/jemalloc/jemalloc_defs.h rename to contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h index 8e361d3db690..405a1f10d3e1 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h @@ -1,4 +1,6 @@ -/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -7,40 +9,13 @@ /* #undef JEMALLOC_PREFIX */ /* #undef JEMALLOC_CPREFIX */ -/* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#define je_malloc_conf malloc_conf -#define je_malloc_message malloc_message -#define je_malloc malloc -#define je_calloc calloc -#define je_posix_memalign posix_memalign -#define je_aligned_alloc aligned_alloc -#define je_realloc realloc -#define je_free free -#define je_malloc_usable_size malloc_usable_size -#define je_malloc_stats_print malloc_stats_print -#define je_mallctl mallctl -#define je_mallctlnametomib mallctlnametomib -#define je_mallctlbymib mallctlbymib -/* #undef je_memalign */ -#define je_valloc valloc -#define je_allocm allocm -#define je_rallocm rallocm -#define je_sallocm sallocm -#define je_dallocm dallocm -#define je_nallocm nallocm - /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. * For shared libraries, symbol visibility mechanisms prevent these symbols * from being exported, but for static libraries, naming collisions are a real * possibility. */ -#define JEMALLOC_PRIVATE_NAMESPACE "__jemalloc_" -#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) __jemalloc_##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix +#define JEMALLOC_PRIVATE_NAMESPACE __je_ /* * Hyper-threaded CPUs may need a special instruction inside spin loops in @@ -102,32 +77,6 @@ */ #define JEMALLOC_MUTEX_INIT_CB 1 -/* Defined if __attribute__((...)) syntax is supported. */ -#define JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -#elif _MSC_VER -# define JEMALLOC_ATTR(s) -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_NOINLINE __declspec(noinline) -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_EXPORT -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_SECTION(s) -# define JEMALLOC_NOINLINE -#endif - /* Defined if sbrk() is supported. */ #define JEMALLOC_HAVE_SBRK @@ -137,6 +86,9 @@ /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ #define JEMALLOC_CC_SILENCE +/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ +/* #undef JEMALLOC_CODE_COVERAGE */ + /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -174,9 +126,6 @@ /* Support memory filling (junk/zero/quarantine/redzone). */ #define JEMALLOC_FILL -/* Support the experimental API. */ -#define JEMALLOC_EXPERIMENTAL - /* Support utrace(2)-based tracing. */ #define JEMALLOC_UTRACE @@ -215,22 +164,6 @@ */ /* #undef JEMALLOC_IVSALLOC */ -/* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. - */ -/* #undef JEMALLOC_OVERRIDE_MEMALIGN */ -#define JEMALLOC_OVERRIDE_VALLOC - -/* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. - */ -#define JEMALLOC_USABLE_SIZE_CONST const - /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -255,9 +188,6 @@ */ /* #undef JEMALLOC_HAS_ALLOCA_H */ -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 - /* sizeof(int) == 2^LG_SIZEOF_INT. */ #define LG_SIZEOF_INT 2 @@ -266,3 +196,5 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #define LG_SIZEOF_INTMAX_T 3 + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h new file mode 100644 index 000000000000..70602ee8f53e --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -0,0 +1,47 @@ +/* + * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for + * functions that are static inline functions if inlining is enabled, and + * single-definition library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in + * which case the denoted functions are always static, regardless of whether + * inlining is enabled. + */ +#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) + /* Disable inlining to make debugging/profiling easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define JEMALLOC_INLINE_C static +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# define JEMALLOC_INLINE_C static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + +#ifdef JEMALLOC_CC_SILENCE +# define UNUSED JEMALLOC_ATTR(unused) +#else +# define UNUSED +#endif + +#define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) +#define QI(q) ((int64_t)q) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index 2a98d1fe5b92..6a0fa0fb7bfd 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -8,6 +8,7 @@ #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) #define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) #define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) @@ -50,8 +51,11 @@ #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) #define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) +#define arena_quarantine_junk_small JEMALLOC_N(arena_quarantine_junk_small) #define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) #define arena_run_regind JEMALLOC_N(arena_run_regind) #define arena_salloc JEMALLOC_N(arena_salloc) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) @@ -68,6 +72,7 @@ #define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) #define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) #define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) +#define arenas_tsd_init_head JEMALLOC_N(arenas_tsd_init_head) #define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) #define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) @@ -191,6 +196,7 @@ #define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) #define huge_malloc JEMALLOC_N(huge_malloc) #define huge_mtx JEMALLOC_N(huge_mtx) #define huge_ndalloc JEMALLOC_N(huge_ndalloc) @@ -206,19 +212,21 @@ #define huge_salloc JEMALLOC_N(huge_salloc) #define iallocm JEMALLOC_N(iallocm) #define icalloc JEMALLOC_N(icalloc) -#define icallocx JEMALLOC_N(icallocx) +#define icalloct JEMALLOC_N(icalloct) #define idalloc JEMALLOC_N(idalloc) -#define idallocx JEMALLOC_N(idallocx) +#define idalloct JEMALLOC_N(idalloct) #define imalloc JEMALLOC_N(imalloc) -#define imallocx JEMALLOC_N(imallocx) +#define imalloct JEMALLOC_N(imalloct) #define ipalloc JEMALLOC_N(ipalloc) -#define ipallocx JEMALLOC_N(ipallocx) +#define ipalloct JEMALLOC_N(ipalloct) #define iqalloc JEMALLOC_N(iqalloc) -#define iqallocx JEMALLOC_N(iqallocx) +#define iqalloct JEMALLOC_N(iqalloct) #define iralloc JEMALLOC_N(iralloc) -#define irallocx JEMALLOC_N(irallocx) +#define iralloct JEMALLOC_N(iralloct) +#define iralloct_realign JEMALLOC_N(iralloct_realign) #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) +#define ixalloc JEMALLOC_N(ixalloc) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) @@ -249,6 +257,7 @@ #define ncpus JEMALLOC_N(ncpus) #define nhbins JEMALLOC_N(nhbins) #define opt_abort JEMALLOC_N(opt_abort) +#define opt_dss JEMALLOC_N(opt_dss) #define opt_junk JEMALLOC_N(opt_junk) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) #define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) @@ -278,8 +287,10 @@ #define prof_boot0 JEMALLOC_N(prof_boot0) #define prof_boot1 JEMALLOC_N(prof_boot1) #define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_bt_count JEMALLOC_N(prof_bt_count) #define prof_ctx_get JEMALLOC_N(prof_ctx_get) #define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_dump_open JEMALLOC_N(prof_dump_open) #define prof_free JEMALLOC_N(prof_free) #define prof_gdump JEMALLOC_N(prof_gdump) #define prof_idump JEMALLOC_N(prof_idump) @@ -305,6 +316,7 @@ #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) #define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) +#define prof_tdata_tsd_init_head JEMALLOC_N(prof_tdata_tsd_init_head) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) #define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) @@ -318,8 +330,10 @@ #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) #define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) +#define quarantine_tsd_init_head JEMALLOC_N(quarantine_tsd_init_head) #define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) +#define rtree_delete JEMALLOC_N(rtree_delete) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) @@ -330,6 +344,7 @@ #define s2u JEMALLOC_N(s2u) #define sa2u JEMALLOC_N(sa2u) #define set_errno JEMALLOC_N(set_errno) +#define small_size2bin JEMALLOC_N(small_size2bin) #define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) @@ -362,6 +377,7 @@ #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) +#define tcache_enabled_tsd_init_head JEMALLOC_N(tcache_enabled_tsd_init_head) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) @@ -378,6 +394,7 @@ #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) #define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) +#define tcache_tsd_init_head JEMALLOC_N(tcache_tsd_init_head) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) #define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) #define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) @@ -387,5 +404,8 @@ #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) #define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) +#define thread_allocated_tsd_init_head JEMALLOC_N(thread_allocated_tsd_init_head) #define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) +#define tsd_init_check_recursion JEMALLOC_N(tsd_init_check_recursion) +#define tsd_init_finish JEMALLOC_N(tsd_init_finish) #define u2rz JEMALLOC_N(u2rz) diff --git a/contrib/jemalloc/include/jemalloc/internal/prng.h b/contrib/jemalloc/include/jemalloc/internal/prng.h index 83a5462b4dd0..7b2b06512ffc 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prng.h +++ b/contrib/jemalloc/include/jemalloc/internal/prng.h @@ -25,7 +25,7 @@ * uint32_t state : Seed value. * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ +#define prng32(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 32); \ \ @@ -35,7 +35,7 @@ } while (false) /* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ +#define prng64(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 64); \ \ diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h index 119a5b1bcb7b..db056fc4f266 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof.h @@ -129,6 +129,7 @@ struct prof_ctx_s { * limbo due to one of: * - Initializing per thread counters associated with this ctx. * - Preparing to destroy this ctx. + * - Dumping a heap profile that includes this ctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the * ctx. */ @@ -145,7 +146,11 @@ struct prof_ctx_s { * this context. */ ql_head(prof_thr_cnt_t) cnts_ql; + + /* Linkage for list of contexts to be dumped. */ + ql_elm(prof_ctx_t) dump_link; }; +typedef ql_head(prof_ctx_t) prof_ctx_list_t; struct prof_tdata_s { /* @@ -195,7 +200,12 @@ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[PATH_MAX + 1]; +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; /* * Profile dump interval, measured in bytes allocated. Each arena triggers a @@ -215,6 +225,11 @@ extern bool prof_promote; void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_bt_count(void); +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *prof_dump_open; +#endif void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); @@ -289,11 +304,11 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx); +void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx); void prof_free(const void *ptr, size_t size); #endif @@ -320,6 +335,20 @@ prof_tdata_get(bool create) JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ +#ifdef JEMALLOC_PROF uint64_t r; double u; @@ -341,7 +370,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * Luc Devroye * Springer-Verlag, New York, 1986 * pp 500 - * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) + * (http://luc.devroye.org/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); @@ -349,6 +378,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; +#endif } JEMALLOC_INLINE prof_ctx_t * @@ -371,7 +401,7 @@ prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -381,7 +411,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, ctx); + arena_prof_ctx_set(ptr, usize, ctx); } else huge_prof_ctx_set(ptr, ctx); } @@ -416,20 +446,20 @@ prof_sample_accum_update(size_t size) } JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* * Don't sample. For malloc()-like allocation, it is * always possible to tell in advance how large an * object's usable size will be, so there should never - * be a difference between the size passed to + * be a difference between the usize passed to * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); @@ -437,17 +467,17 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) } if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; /*********/ mb_write(); /*********/ cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } /*********/ mb_write(); @@ -457,12 +487,12 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) mb_write(); /*********/ } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx) +prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx) { prof_thr_cnt_t *told_cnt; @@ -470,15 +500,15 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* - * Don't sample. The size passed to + * Don't sample. The usize passed to * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though - * its actual size was insufficient to cross + * its actual usize was insufficient to cross * the sample threshold. */ cnt = (prof_thr_cnt_t *)(uintptr_t)1U; @@ -495,7 +525,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, */ malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; + old_ctx->cnt_merged.curbytes -= old_usize; malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } @@ -505,23 +535,23 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) { told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; + told_cnt->cnts.curbytes -= old_usize; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } } /*********/ diff --git a/contrib/jemalloc/include/jemalloc/internal/public_namespace.h b/contrib/jemalloc/include/jemalloc/internal/public_namespace.h new file mode 100644 index 000000000000..32c72b60e43b --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/public_namespace.h @@ -0,0 +1,25 @@ +#define je_malloc_conf JEMALLOC_N(malloc_conf) +#define je_malloc_message JEMALLOC_N(malloc_message) +#define je_malloc JEMALLOC_N(malloc) +#define je_calloc JEMALLOC_N(calloc) +#define je_posix_memalign JEMALLOC_N(posix_memalign) +#define je_aligned_alloc JEMALLOC_N(aligned_alloc) +#define je_realloc JEMALLOC_N(realloc) +#define je_free JEMALLOC_N(free) +#define je_mallocx JEMALLOC_N(mallocx) +#define je_rallocx JEMALLOC_N(rallocx) +#define je_xallocx JEMALLOC_N(xallocx) +#define je_sallocx JEMALLOC_N(sallocx) +#define je_dallocx JEMALLOC_N(dallocx) +#define je_nallocx JEMALLOC_N(nallocx) +#define je_mallctl JEMALLOC_N(mallctl) +#define je_mallctlnametomib JEMALLOC_N(mallctlnametomib) +#define je_mallctlbymib JEMALLOC_N(mallctlbymib) +#define je_malloc_stats_print JEMALLOC_N(malloc_stats_print) +#define je_malloc_usable_size JEMALLOC_N(malloc_usable_size) +#define je_valloc JEMALLOC_N(valloc) +#define je_allocm JEMALLOC_N(allocm) +#define je_dallocm JEMALLOC_N(dallocm) +#define je_nallocm JEMALLOC_N(nallocm) +#define je_rallocm JEMALLOC_N(rallocm) +#define je_sallocm JEMALLOC_N(sallocm) diff --git a/contrib/jemalloc/include/jemalloc/internal/ql.h b/contrib/jemalloc/include/jemalloc/internal/ql.h index a9ed2393f0c2..f70c5f6f3919 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ql.h +++ b/contrib/jemalloc/include/jemalloc/internal/ql.h @@ -1,61 +1,61 @@ /* * List definitions. */ -#define ql_head(a_type) \ +#define ql_head(a_type) \ struct { \ a_type *qlh_first; \ } -#define ql_head_initializer(a_head) {NULL} +#define ql_head_initializer(a_head) {NULL} -#define ql_elm(a_type) qr(a_type) +#define ql_elm(a_type) qr(a_type) /* List functions. */ -#define ql_new(a_head) do { \ +#define ql_new(a_head) do { \ (a_head)->qlh_first = NULL; \ } while (0) -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) -#define ql_first(a_head) ((a_head)->qlh_first) +#define ql_first(a_head) ((a_head)->qlh_first) -#define ql_last(a_head, a_field) \ +#define ql_last(a_head, a_field) \ ((ql_first(a_head) != NULL) \ ? qr_prev(ql_first(a_head), a_field) : NULL) -#define ql_next(a_head, a_elm, a_field) \ +#define ql_next(a_head, a_elm, a_field) \ ((ql_last(a_head, a_field) != (a_elm)) \ ? qr_next((a_elm), a_field) : NULL) -#define ql_prev(a_head, a_elm, a_field) \ +#define ql_prev(a_head, a_elm, a_field) \ ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ : NULL) -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ qr_before_insert((a_qlelm), (a_elm), a_field); \ if (ql_first(a_head) == (a_qlelm)) { \ ql_first(a_head) = (a_elm); \ } \ } while (0) -#define ql_after_insert(a_qlelm, a_elm, a_field) \ +#define ql_after_insert(a_qlelm, a_elm, a_field) \ qr_after_insert((a_qlelm), (a_elm), a_field) -#define ql_head_insert(a_head, a_elm, a_field) do { \ +#define ql_head_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = (a_elm); \ } while (0) -#define ql_tail_insert(a_head, a_elm, a_field) do { \ +#define ql_tail_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = qr_next((a_elm), a_field); \ } while (0) -#define ql_remove(a_head, a_elm, a_field) do { \ +#define ql_remove(a_head, a_elm, a_field) do { \ if (ql_first(a_head) == (a_elm)) { \ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ } \ @@ -66,18 +66,18 @@ struct { \ } \ } while (0) -#define ql_head_remove(a_head, a_type, a_field) do { \ +#define ql_head_remove(a_head, a_type, a_field) do { \ a_type *t = ql_first(a_head); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_tail_remove(a_head, a_type, a_field) do { \ +#define ql_tail_remove(a_head, a_type, a_field) do { \ a_type *t = ql_last(a_head, a_field); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_foreach(a_var, a_head, a_field) \ +#define ql_foreach(a_var, a_head, a_field) \ qr_foreach((a_var), ql_first(a_head), a_field) -#define ql_reverse_foreach(a_var, a_head, a_field) \ +#define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/contrib/jemalloc/include/jemalloc/internal/qr.h b/contrib/jemalloc/include/jemalloc/internal/qr.h index fe22352feddc..602944b9b4fa 100644 --- a/contrib/jemalloc/include/jemalloc/internal/qr.h +++ b/contrib/jemalloc/include/jemalloc/internal/qr.h @@ -1,28 +1,28 @@ /* Ring definitions. */ -#define qr(a_type) \ +#define qr(a_type) \ struct { \ a_type *qre_next; \ a_type *qre_prev; \ } /* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ +#define qr_new(a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qr); \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ (a_qr)->a_field.qre_next = (a_qrelm); \ (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ (a_qrelm)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_after_insert(a_qrelm, a_qr, a_field) \ +#define qr_after_insert(a_qrelm, a_qr, a_field) \ do \ { \ (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ @@ -31,7 +31,7 @@ struct { \ (a_qrelm)->a_field.qre_next = (a_qr); \ } while (0) -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ void *t; \ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ @@ -42,10 +42,10 @@ struct { \ /* qr_meld() and qr_split() are functionally equivalent, so there's no need to * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ +#define qr_split(a_qr_a, a_qr_b, a_field) \ qr_meld((a_qr_a), (a_qr_b), a_field) -#define qr_remove(a_qr, a_field) do { \ +#define qr_remove(a_qr, a_field) do { \ (a_qr)->a_field.qre_prev->a_field.qre_next \ = (a_qr)->a_field.qre_next; \ (a_qr)->a_field.qre_next->a_field.qre_prev \ @@ -54,13 +54,13 @@ struct { \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_foreach(var, a_qr, a_field) \ +#define qr_foreach(var, a_qr, a_field) \ for ((var) = (a_qr); \ (var) != NULL; \ (var) = (((var)->a_field.qre_next != (a_qr)) \ ? (var)->a_field.qre_next : NULL)) -#define qr_reverse_foreach(var, a_qr, a_field) \ +#define qr_reverse_foreach(var, a_qr, a_field) \ for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ diff --git a/contrib/jemalloc/include/jemalloc/internal/rb.h b/contrib/jemalloc/include/jemalloc/internal/rb.h index 4d89ce522016..423802eb2dce 100644 --- a/contrib/jemalloc/include/jemalloc/internal/rb.h +++ b/contrib/jemalloc/include/jemalloc/internal/rb.h @@ -22,10 +22,6 @@ #ifndef RB_H_ #define RB_H_ -#if 0 -__FBSDID("$FreeBSD$"); -#endif - #ifdef RB_COMPACT /* Node structure. */ #define rb_node(a_type) \ diff --git a/contrib/jemalloc/include/jemalloc/internal/rtree.h b/contrib/jemalloc/include/jemalloc/internal/rtree.h index 9bd98548cfed..bc74769f50ed 100644 --- a/contrib/jemalloc/include/jemalloc/internal/rtree.h +++ b/contrib/jemalloc/include/jemalloc/internal/rtree.h @@ -14,17 +14,18 @@ typedef struct rtree_s rtree_t; * Size of each radix tree node (must be a power of 2). This impacts tree * depth. */ -#if (LG_SIZEOF_PTR == 2) -# define RTREE_NODESIZE (1U << 14) -#else -# define RTREE_NODESIZE CACHELINE -#endif +#define RTREE_NODESIZE (1U << 16) + +typedef void *(rtree_alloc_t)(size_t); +typedef void (rtree_dalloc_t)(void *); #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS struct rtree_s { + rtree_alloc_t *alloc; + rtree_dalloc_t *dalloc; malloc_mutex_t mutex; void **root; unsigned height; @@ -35,7 +36,8 @@ struct rtree_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits); +rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +void rtree_delete(rtree_t *rtree); void rtree_prefork(rtree_t *rtree); void rtree_postfork_parent(rtree_t *rtree); void rtree_postfork_child(rtree_t *rtree); @@ -45,20 +47,20 @@ void rtree_postfork_child(rtree_t *rtree); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifndef JEMALLOC_DEBUG -void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#ifdef JEMALLOC_DEBUG +uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); #endif -void *rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +uint8_t rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) #define RTREE_GET_GENERATE(f) \ /* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE void * \ +JEMALLOC_INLINE uint8_t \ f(rtree_t *rtree, uintptr_t key) \ { \ - void *ret; \ + uint8_t ret; \ uintptr_t subkey; \ unsigned i, lshift, height, bits; \ void **node, **child; \ @@ -68,12 +70,12 @@ f(rtree_t *rtree, uintptr_t key) \ i < height - 1; \ i++, lshift += bits, node = child) { \ bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ 3)) - bits); \ child = (void**)node[subkey]; \ if (child == NULL) { \ RTREE_UNLOCK(&rtree->mutex); \ - return (NULL); \ + return (0); \ } \ } \ \ @@ -84,7 +86,10 @@ f(rtree_t *rtree, uintptr_t key) \ bits = rtree->level2bits[i]; \ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ bits); \ - ret = node[subkey]; \ + { \ + uint8_t *leaf = (uint8_t *)node; \ + ret = leaf[subkey]; \ + } \ RTREE_UNLOCK(&rtree->mutex); \ \ RTREE_GET_VALIDATE \ @@ -123,7 +128,7 @@ RTREE_GET_GENERATE(rtree_get) #undef RTREE_GET_VALIDATE JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, void *val) +rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) { uintptr_t subkey; unsigned i, lshift, height, bits; @@ -138,14 +143,14 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val) bits); child = (void**)node[subkey]; if (child == NULL) { - child = (void**)base_alloc(sizeof(void *) << - rtree->level2bits[i+1]); + size_t size = ((i + 1 < height - 1) ? sizeof(void *) + : (sizeof(uint8_t))) << rtree->level2bits[i+1]; + child = (void**)rtree->alloc(size); if (child == NULL) { malloc_mutex_unlock(&rtree->mutex); return (true); } - memset(child, 0, sizeof(void *) << - rtree->level2bits[i+1]); + memset(child, 0, size); node[subkey] = child; } } @@ -153,7 +158,10 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val) /* node is a leaf, so it contains values rather than node pointers. */ bits = rtree->level2bits[i]; subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - node[subkey] = val; + { + uint8_t *leaf = (uint8_t *)node; + leaf[subkey] = val; + } malloc_mutex_unlock(&rtree->mutex); return (false); diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h index d4eecdee0dc7..c3d4b58d4dc5 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -297,6 +297,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; + size = arena_bin_info[binind].reg_size; ret = tcache_alloc_easy(tbin); if (ret == NULL) { ret = tcache_alloc_small_hard(tcache, tbin, binind); diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index 0037cf35e703..9fb4a23ec6bf 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -6,6 +6,12 @@ typedef bool (*malloc_tsd_cleanup_t)(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +typedef struct tsd_init_block_s tsd_init_block_t; +typedef struct tsd_init_head_s tsd_init_head_t; +#endif + /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There * are four macros that support (at least) three use cases: file-private, @@ -75,12 +81,13 @@ extern __thread a_type a_name##_tls; \ extern pthread_key_t a_name##_tsd; \ extern bool a_name##_booted; #elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ +#define malloc_tsd_externs(a_name, a_type) \ extern DWORD a_name##_tsd; \ extern bool a_name##_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##_tsd; \ +extern tsd_init_head_t a_name##_tsd_init_head; \ extern bool a_name##_booted; #endif @@ -105,6 +112,10 @@ a_attr bool a_name##_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr pthread_key_t a_name##_tsd; \ +a_attr tsd_init_head_t a_name##_tsd_init_head = { \ + ql_head_initializer(blocks), \ + MALLOC_MUTEX_INITIALIZER \ +}; \ a_attr bool a_name##_booted = false; #endif @@ -333,8 +344,14 @@ a_name##_tsd_get_wrapper(void) \ pthread_getspecific(a_name##_tsd); \ \ if (wrapper == NULL) { \ + tsd_init_block_t block; \ + wrapper = tsd_init_check_recursion( \ + &a_name##_tsd_init_head, &block); \ + if (wrapper) \ + return (wrapper); \ wrapper = (a_name##_tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + block.data = wrapper; \ if (wrapper == NULL) { \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ @@ -350,6 +367,7 @@ a_name##_tsd_get_wrapper(void) \ " TSD for "#a_name"\n"); \ abort(); \ } \ + tsd_init_finish(&a_name##_tsd_init_head, &block); \ } \ return (wrapper); \ } \ @@ -379,6 +397,19 @@ a_name##_tsd_set(a_type *val) \ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; +#endif + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -388,6 +419,12 @@ void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *); void malloc_tsd_cleanup_register(bool (*f)(void)); void malloc_tsd_boot(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +#endif #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/util.h b/contrib/jemalloc/include/jemalloc/internal/util.h index 8479693631ab..6b938f746889 100644 --- a/contrib/jemalloc/include/jemalloc/internal/util.h +++ b/contrib/jemalloc/include/jemalloc/internal/util.h @@ -14,7 +14,7 @@ * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. */ -#define JEMALLOC_CONCAT(...) __VA_ARGS__ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ /* * Silence compiler warnings due to uninitialized values. This is used @@ -42,12 +42,6 @@ } while (0) #endif -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if ((c) == false) \ - assert(false); \ -} while (0) - #ifndef not_reached #define not_reached() do { \ if (config_debug) { \ @@ -69,10 +63,18 @@ } while (0) #endif +#ifndef assert_not_implemented #define assert_not_implemented(e) do { \ if (config_debug && !(e)) \ not_implemented(); \ } while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + not_reached(); \ +} while (0) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -82,8 +84,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -int buferror(char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, + char **restrict endptr, int base); void malloc_write(const char *s); /* @@ -107,7 +110,6 @@ void malloc_printf(const char *format, ...) #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); -void malloc_write(const char *s); void set_errno(int errnum); int get_errno(void); #endif diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index 8d7133bf1770..8b498677641d 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -4,40 +4,132 @@ extern "C" { #endif +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR + +/* Support the experimental API. */ +#define JEMALLOC_EXPERIMENTAL + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE_MEMALIGN */ +#define JEMALLOC_OVERRIDE_VALLOC + +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#define JEMALLOC_USABLE_SIZE_CONST const + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR 3 + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#ifndef JEMALLOC_NO_RENAME +# define je_malloc_conf malloc_conf +# define je_malloc_message malloc_message +# define je_malloc malloc +# define je_calloc calloc +# define je_posix_memalign posix_memalign +# define je_aligned_alloc aligned_alloc +# define je_realloc realloc +# define je_free free +# define je_mallocx mallocx +# define je_rallocx rallocx +# define je_xallocx xallocx +# define je_sallocx sallocx +# define je_dallocx dallocx +# define je_nallocx nallocx +# define je_mallctl mallctl +# define je_mallctlnametomib mallctlnametomib +# define je_mallctlbymib mallctlbymib +# define je_malloc_stats_print malloc_stats_print +# define je_malloc_usable_size malloc_usable_size +# define je_valloc valloc +# define je_allocm allocm +# define je_dallocm dallocm +# define je_nallocm nallocm +# define je_rallocm rallocm +# define je_sallocm sallocm +#endif + +#include "jemalloc_FreeBSD.h" + #include #include -#define JEMALLOC_VERSION "3.4.1-0-g0135fb806e4137dc9cdf152541926a2bc95e33f0" +#define JEMALLOC_VERSION "3.5.0-0-gcc47dde16203a6ae7eb685b53e1ae501f3869bc6" #define JEMALLOC_VERSION_MAJOR 3 -#define JEMALLOC_VERSION_MINOR 4 -#define JEMALLOC_VERSION_BUGFIX 1 +#define JEMALLOC_VERSION_MINOR 5 +#define JEMALLOC_VERSION_BUGFIX 0 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "0135fb806e4137dc9cdf152541926a2bc95e33f0" +#define JEMALLOC_VERSION_GID "cc47dde16203a6ae7eb685b53e1ae501f3869bc6" -#include "jemalloc_defs.h" -#include "jemalloc_FreeBSD.h" +# define MALLOCX_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) (ffs(a)-1) +# else +# define MALLOCX_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define MALLOCX_ZERO ((int)0x40) +/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) #ifdef JEMALLOC_EXPERIMENTAL -#define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif -#define ALLOCM_ZERO ((int)0x40) -#define ALLOCM_NO_MOVE ((int)0x80) +# define ALLOCM_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define ALLOCM_ALIGN(a) (ffs(a)-1) +# else +# define ALLOCM_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define ALLOCM_ZERO ((int)0x40) +# define ALLOCM_NO_MOVE ((int)0x80) /* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) +# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) +# define ALLOCM_SUCCESS 0 +# define ALLOCM_ERR_OOM 1 +# define ALLOCM_ERR_NOT_MOVED 2 +#endif -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE #endif /* - * The je_ prefix on the following public symbol declarations is an artifact of - * namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see below). + * The je_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). */ extern JEMALLOC_EXPORT const char *je_malloc_conf; extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, @@ -53,6 +145,25 @@ JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); JEMALLOC_EXPORT void je_free(void *ptr); +JEMALLOC_EXPORT void *je_mallocx(size_t size, int flags); +JEMALLOC_EXPORT void *je_rallocx(void *ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t je_xallocx(void *ptr, size_t size, size_t extra, + int flags); +JEMALLOC_EXPORT size_t je_sallocx(const void *ptr, int flags); +JEMALLOC_EXPORT void je_dallocx(void *ptr, int flags); +JEMALLOC_EXPORT size_t je_nallocx(size_t size, int flags); + +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT size_t je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr); + #ifdef JEMALLOC_OVERRIDE_MEMALIGN JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); @@ -62,17 +173,6 @@ JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); #endif -JEMALLOC_EXPORT size_t je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr); -JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, - const char *), void *je_cbopaque, const char *opts); -JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, - size_t *miblenp); -JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); - #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); @@ -93,63 +193,69 @@ JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); * --with-mangling and/or --with-jemalloc-prefix configuration settings. */ #ifdef JEMALLOC_MANGLE -#ifndef JEMALLOC_NO_DEMANGLE -#define JEMALLOC_NO_DEMANGLE -#endif -#define malloc_conf je_malloc_conf -#define malloc_message je_malloc_message -#define malloc je_malloc -#define calloc je_calloc -#define posix_memalign je_posix_memalign -#define aligned_alloc je_aligned_alloc -#define realloc je_realloc -#define free je_free -#define malloc_usable_size je_malloc_usable_size -#define malloc_stats_print je_malloc_stats_print -#define mallctl je_mallctl -#define mallctlnametomib je_mallctlnametomib -#define mallctlbymib je_mallctlbymib -#define memalign je_memalign -#define valloc je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#define allocm je_allocm -#define rallocm je_rallocm -#define sallocm je_sallocm -#define dallocm je_dallocm -#define nallocm je_nallocm -#endif +# ifndef JEMALLOC_NO_DEMANGLE +# define JEMALLOC_NO_DEMANGLE +# endif +# define malloc_conf je_malloc_conf +# define malloc_message je_malloc_message +# define malloc je_malloc +# define calloc je_calloc +# define posix_memalign je_posix_memalign +# define aligned_alloc je_aligned_alloc +# define realloc je_realloc +# define free je_free +# define mallocx je_mallocx +# define rallocx je_rallocx +# define xallocx je_xallocx +# define sallocx je_sallocx +# define dallocx je_dallocx +# define nallocx je_nallocx +# define mallctl je_mallctl +# define mallctlnametomib je_mallctlnametomib +# define mallctlbymib je_mallctlbymib +# define malloc_stats_print je_malloc_stats_print +# define malloc_usable_size je_malloc_usable_size +# define valloc je_valloc +# define allocm je_allocm +# define dallocm je_dallocm +# define nallocm je_nallocm +# define rallocm je_rallocm +# define sallocm je_sallocm #endif /* - * The je_* macros can be used as stable alternative names for the public - * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant - * for use in jemalloc itself, but it can be used by application code to + * The je_* macros can be used as stable alternative names for the + * public jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily + * meant for use in jemalloc itself, but it can be used by application code to * provide isolation from the name mangling specified via --with-mangling * and/or --with-jemalloc-prefix. */ #ifndef JEMALLOC_NO_DEMANGLE -#undef je_malloc_conf -#undef je_malloc_message -#undef je_malloc -#undef je_calloc -#undef je_posix_memalign -#undef je_aligned_alloc -#undef je_realloc -#undef je_free -#undef je_malloc_usable_size -#undef je_malloc_stats_print -#undef je_mallctl -#undef je_mallctlnametomib -#undef je_mallctlbymib -#undef je_memalign -#undef je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#undef je_allocm -#undef je_rallocm -#undef je_sallocm -#undef je_dallocm -#undef je_nallocm -#endif +# undef je_malloc_conf +# undef je_malloc_message +# undef je_malloc +# undef je_calloc +# undef je_posix_memalign +# undef je_aligned_alloc +# undef je_realloc +# undef je_free +# undef je_mallocx +# undef je_rallocx +# undef je_xallocx +# undef je_sallocx +# undef je_dallocx +# undef je_nallocx +# undef je_mallctl +# undef je_mallctlnametomib +# undef je_mallctlbymib +# undef je_malloc_stats_print +# undef je_malloc_usable_size +# undef je_valloc +# undef je_allocm +# undef je_dallocm +# undef je_nallocm +# undef je_rallocm +# undef je_sallocm #endif #ifdef __cplusplus diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h index e6c84075aa16..94554bcc5da3 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h @@ -74,6 +74,12 @@ extern int __isthreaded; #undef je_free #undef je_posix_memalign #undef je_malloc_usable_size +#undef je_mallocx +#undef je_rallocx +#undef je_xallocx +#undef je_sallocx +#undef je_dallocx +#undef je_nallocx #undef je_allocm #undef je_rallocm #undef je_sallocm @@ -85,6 +91,12 @@ extern int __isthreaded; #define je_free __free #define je_posix_memalign __posix_memalign #define je_malloc_usable_size __malloc_usable_size +#define je_mallocx __mallocx +#define je_rallocx __rallocx +#define je_xallocx __xallocx +#define je_sallocx __sallocx +#define je_dallocx __dallocx +#define je_nallocx __nallocx #define je_allocm __allocm #define je_rallocm __rallocm #define je_sallocm __sallocm @@ -108,10 +120,15 @@ __weak_reference(__realloc, realloc); __weak_reference(__free, free); __weak_reference(__posix_memalign, posix_memalign); __weak_reference(__malloc_usable_size, malloc_usable_size); +__weak_reference(__mallocx, mallocx); +__weak_reference(__rallocx, rallocx); +__weak_reference(__xallocx, xallocx); +__weak_reference(__sallocx, sallocx); +__weak_reference(__dallocx, dallocx); +__weak_reference(__nallocx, nallocx); __weak_reference(__allocm, allocm); __weak_reference(__rallocm, rallocm); __weak_reference(__sallocm, sallocm); __weak_reference(__dallocm, dallocm); __weak_reference(__nallocm, nallocm); #endif - diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index d28b629a1e14..4da6d50cbf02 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -38,52 +38,18 @@ const uint8_t small_size2bin[] = { }; /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, size_t binind, bool zero); -static arena_chunk_t *arena_chunk_alloc(arena_t *arena); -static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); -static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, - bool large, size_t binind, bool zero); -static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - size_t binind, bool zero); -static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, - arena_chunk_t *chunk, void *arg); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned); -static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize); -static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); -static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); -static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); -static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); -static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); -static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); -static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); -static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size); -static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static void bin_info_init(void); /******************************************************************************/ @@ -388,50 +354,44 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - size_t binind, bool zero) +arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) { - arena_chunk_t *chunk; - size_t run_ind, total_pages, need_pages, rem_pages, i; - size_t flag_dirty; - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); + if (config_stats) { + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive - + sub_pages) << LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } +} + +static void +arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, + size_t flag_dirty, size_t need_pages) +{ + size_t total_pages, rem_pages; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == flag_dirty); - need_pages = (size >> LG_PAGE); - assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } + arena_cactive_update(arena, need_pages, 0); arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, + run_ind+need_pages, (rem_pages << LG_PAGE), + flag_dirty); arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1, (rem_pages << LG_PAGE), - CHUNK_MAP_DIRTY); + flag_dirty); } else { arena_mapbits_unallocated_set(chunk, run_ind+need_pages, (rem_pages << LG_PAGE), @@ -445,166 +405,217 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, false, true); } +} - /* - * Update the page map separately for large vs. small runs, since it is - * possible to avoid iteration for large mallocs. - */ - if (large) { - if (zero) { - if (flag_dirty == 0) { - /* - * The run is clean, so some pages may be - * zeroed (i.e. never before touched). - */ - for (i = 0; i < need_pages; i++) { - if (arena_mapbits_unzeroed_get(chunk, - run_ind+i) != 0) { - arena_run_zero(chunk, run_ind+i, - 1); - } else if (config_debug) { - arena_run_page_validate_zeroed( - chunk, run_ind+i); - } else { - arena_run_page_mark_zeroed( - chunk, run_ind+i); - } +static void +arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, + bool remove, bool zero) +{ + arena_chunk_t *chunk; + size_t flag_dirty, run_ind, need_pages, i; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + + if (remove) { + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, + need_pages); + } + + if (zero) { + if (flag_dirty == 0) { + /* + * The run is clean, so some pages may be zeroed (i.e. + * never before touched). + */ + for (i = 0; i < need_pages; i++) { + if (arena_mapbits_unzeroed_get(chunk, run_ind+i) + != 0) + arena_run_zero(chunk, run_ind+i, 1); + else if (config_debug) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } else { + arena_run_page_mark_zeroed(chunk, + run_ind+i); } - } else { - /* - * The run is dirty, so all pages must be - * zeroed. - */ - arena_run_zero(chunk, run_ind, need_pages); } } else { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + - (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); + /* The run is dirty, so all pages must be zeroed. */ + arena_run_zero(chunk, run_ind, need_pages); } - - /* - * Set the last element first, in case the run only contains one - * page (i.e. both statements set the same element). - */ - arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, - flag_dirty); - arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } else { - assert(zero == false); - /* - * Propagate the dirty and unzeroed flags to the allocated - * small run, so that arena_dalloc_bin_run() has the ability to - * conditionally trim clean pages. - */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not - * actually cause an observable failure. - */ - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_run_page_validate_zeroed(chunk, run_ind); - for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+i); - } - } - arena_mapbits_small_set(chunk, run_ind+need_pages-1, - need_pages-1, binind, flag_dirty); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == - 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+need_pages-1); - } VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } + + /* + * Set the last element first, in case the run only contains one page + * (i.e. both statements set the same element). + */ + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); +} + +static void +arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) +{ + + arena_run_split_large_helper(arena, run, size, true, zero); +} + +static void +arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) +{ + + arena_run_split_large_helper(arena, run, size, false, zero); +} + +static void +arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, + size_t binind) +{ + arena_chunk_t *chunk; + size_t flag_dirty, run_ind, need_pages, i; + + assert(binind != BININD_INVALID); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages); + + /* + * Propagate the dirty and unzeroed flags to the allocated small run, + * so that arena_dalloc_bin_run() has the ability to conditionally trim + * clean pages. + */ + arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not actually + * cause an observable failure. + */ + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind) == 0) + arena_run_page_validate_zeroed(chunk, run_ind); + for (i = 1; i < need_pages - 1; i++) { + arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); + if (config_debug && flag_dirty == 0 && + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+i); + } + arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, + binind, flag_dirty); + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); +} + +static arena_chunk_t * +arena_chunk_init_spare(arena_t *arena) +{ + arena_chunk_t *chunk; + + assert(arena->spare != NULL); + + chunk = arena->spare; + arena->spare = NULL; + + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + return (chunk); +} + +static arena_chunk_t * +arena_chunk_init_hard(arena_t *arena) +{ + arena_chunk_t *chunk; + bool zero; + size_t unzeroed, i; + + assert(arena->spare == NULL); + + zero = false; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false, + &zero, arena->dss_prec); + malloc_mutex_lock(&arena->lock); + if (chunk == NULL) + return (NULL); + if (config_stats) + arena->stats.mapped += chunksize; + + chunk->arena = arena; + + /* + * Claim that no pages are in use, since the header is merely overhead. + */ + chunk->ndirty = 0; + + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + + /* + * Initialize the map to contain one maximal free untouched run. Mark + * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. + */ + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); + /* + * There is no need to initialize the internal page map entries unless + * the chunk is not zeroed. + */ + if (zero == false) { + VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + for (i = map_bias+1; i < chunk_npages-1; i++) + arena_mapbits_unzeroed_set(chunk, i, unzeroed); + } else { + VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + if (config_debug) { + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } + } + } + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + unzeroed); + + return (chunk); } static arena_chunk_t * arena_chunk_alloc(arena_t *arena) { arena_chunk_t *chunk; - size_t i; - if (arena->spare != NULL) { - chunk = arena->spare; - arena->spare = NULL; - - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); - assert(arena_mapbits_unallocated_size_get(chunk, - chunk_npages-1) == arena_maxclass); - assert(arena_mapbits_dirty_get(chunk, map_bias) == - arena_mapbits_dirty_get(chunk, chunk_npages-1)); - } else { - bool zero; - size_t unzeroed; - - zero = false; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero, arena->dss_prec); - malloc_mutex_lock(&arena->lock); - if (chunk == NULL) - return (NULL); - if (config_stats) - arena->stats.mapped += chunksize; - - chunk->arena = arena; - - /* - * Claim that no pages are in use, since the header is merely - * overhead. - */ - chunk->ndirty = 0; - - chunk->nruns_avail = 0; - chunk->nruns_adjac = 0; - - /* - * Initialize the map to contain one maximal free untouched run. - * Mark the pages as zeroed iff chunk_alloc() returned a zeroed - * chunk. - */ - unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); - /* - * There is no need to initialize the internal page map entries - * unless the chunk is not zeroed. - */ - if (zero == false) { - VALGRIND_MAKE_MEM_UNDEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); - for (i = map_bias+1; i < chunk_npages-1; i++) - arena_mapbits_unzeroed_set(chunk, i, unzeroed); - } else { - VALGRIND_MAKE_MEM_DEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); - if (config_debug) { - for (i = map_bias+1; i < chunk_npages-1; i++) { - assert(arena_mapbits_unzeroed_get(chunk, - i) == unzeroed); - } - } - } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, - arena_maxclass, unzeroed); - } + if (arena->spare != NULL) + chunk = arena_chunk_init_spare(arena); + else + chunk = arena_chunk_init_hard(arena); /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, @@ -646,8 +657,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) +arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { arena_run_t *run; arena_chunk_map_t *mapelm, key; @@ -662,7 +672,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); + arena_run_split_large(arena, run, size, zero); return (run); } @@ -670,19 +680,16 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) +arena_run_alloc_large(arena_t *arena, size_t size, bool zero) { arena_chunk_t *chunk; arena_run_t *run; assert(size <= arena_maxclass); assert((size & PAGE_MASK) == 0); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); /* Search the arena's chunks for the lowest best fit. */ - run = arena_run_alloc_helper(arena, size, large, binind, zero); + run = arena_run_alloc_large_helper(arena, size, zero); if (run != NULL) return (run); @@ -692,7 +699,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, chunk = arena_chunk_alloc(arena); if (chunk != NULL) { run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); + arena_run_split_large(arena, run, size, zero); return (run); } @@ -701,7 +708,63 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, * sufficient memory available while this one dropped arena->lock in * arena_chunk_alloc(), so search one more time. */ - return (arena_run_alloc_helper(arena, size, large, binind, zero)); + return (arena_run_alloc_large_helper(arena, size, zero)); +} + +static arena_run_t * +arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +{ + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + return (NULL); +} + +static arena_run_t * +arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + assert(binind != BININD_INVALID); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_small_helper(arena, size, binind); + if (run != NULL) + return (run); + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + return (arena_run_alloc_small_helper(arena, size, binind)); } static inline void @@ -727,14 +790,169 @@ arena_maybe_purge(arena_t *arena) arena_purge(arena, false); } +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); +} + +static size_t +arena_compute_npurgatory(arena_t *arena, bool all) +{ + size_t npurgatory, npurgeable; + + /* + * Compute the minimum number of pages that this thread should try to + * purge. + */ + npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; + + return (npurgatory); +} + +static void +arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all, + arena_chunk_mapelms_t *mapelms) +{ + size_t pageind, npages; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); + + npages = run_size >> LG_PAGE; + assert(pageind + npages <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + arena_run_split_large(arena, run, run_size, + false); + /* Append to list for later processing. */ + ql_elm_new(mapelm, u.ql_link); + ql_tail_insert(mapelms, mapelm, u.ql_link); + } + } else { + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; + } else { + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; + npages = bin_info->run_size >> LG_PAGE; + } + } + } + assert(pageind == chunk_npages); + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); +} + +static size_t +arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + size_t npurged, pageind, npages, nmadvise; + arena_chunk_map_t *mapelm; + + malloc_mutex_unlock(&arena->lock); + if (config_stats) + nmadvise = 0; + npurged = 0; + ql_foreach(mapelm, mapelms, u.ql_link) { + bool unzeroed; + size_t flag_unzeroed, i; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; + assert(pageind + npages <= chunk_npages); + unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << + LG_PAGE)), (npages << LG_PAGE)); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* + * Set the unzeroed flag for all pages, now that pages_purge() + * has returned whether the pages were zeroed as a side effect + * of purging. This chunk map modification is safe even though + * the arena mutex isn't currently owned by this thread, + * because the run is marked as allocated, thus protecting it + * from being modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } + npurged += npages; + if (config_stats) + nmadvise++; + } + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.nmadvise += nmadvise; + + return (npurged); +} + +static void +arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + arena_chunk_map_t *mapelm; + size_t pageind; + + /* Deallocate runs. */ + for (mapelm = ql_first(mapelms); mapelm != NULL; + mapelm = ql_first(mapelms)) { + arena_run_t *run; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); + ql_remove(mapelms, mapelm, u.ql_link); + arena_run_dalloc(arena, run, false, true); + } +} + static inline size_t arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) { size_t npurged; - ql_head(arena_chunk_map_t) mapelms; - arena_chunk_map_t *mapelm; - size_t pageind, npages; - size_t nmadvise; + arena_chunk_mapelms_t mapelms; ql_new(&mapelms); @@ -770,121 +988,13 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) if (chunk->nruns_adjac == 0) all = true; - /* - * Temporarily allocate free dirty runs within chunk. If all is false, - * only operate on dirty runs that are fragments; otherwise operate on - * all dirty runs. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - mapelm = arena_mapp_get(chunk, pageind); - if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t run_size = - arena_mapbits_unallocated_size_get(chunk, pageind); - - npages = run_size >> LG_PAGE; - assert(pageind + npages <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+npages-1)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0 && - (all || arena_avail_adjac(chunk, pageind, - npages))) { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - arena_run_split(arena, run, run_size, true, - BININD_INVALID, false); - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(&mapelms, mapelm, u.ql_link); - } - } else { - /* Skip run. */ - if (arena_mapbits_large_get(chunk, pageind) != 0) { - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - assert(arena_mapbits_small_runind_get(chunk, - pageind) == 0); - binind = arena_bin_index(arena, run->bin); - bin_info = &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; - } - } - } - assert(pageind == chunk_npages); - assert(chunk->ndirty == 0 || all == false); - assert(chunk->nruns_adjac == 0); - - malloc_mutex_unlock(&arena->lock); - if (config_stats) - nmadvise = 0; - npurged = 0; - ql_foreach(mapelm, &mapelms, u.ql_link) { - bool unzeroed; - size_t flag_unzeroed, i; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; - assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), (npages << LG_PAGE)); - flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; - /* - * Set the unzeroed flag for all pages, now that pages_purge() - * has returned whether the pages were zeroed as a side effect - * of purging. This chunk map modification is safe even though - * the arena mutex isn't currently owned by this thread, - * because the run is marked as allocated, thus protecting it - * from being modified by any other thread. As long as these - * writes don't perturb the first and last elements' - * CHUNK_MAP_ALLOCATED bits, behavior is well defined. - */ - for (i = 0; i < npages; i++) { - arena_mapbits_unzeroed_set(chunk, pageind+i, - flag_unzeroed); - } - npurged += npages; - if (config_stats) - nmadvise++; - } - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.nmadvise += nmadvise; - - /* Deallocate runs. */ - for (mapelm = ql_first(&mapelms); mapelm != NULL; - mapelm = ql_first(&mapelms)) { - arena_run_t *run; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); - ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false, true); - } + arena_chunk_stash_dirty(arena, chunk, all, &mapelms); + npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms); + arena_chunk_unstash_purged(arena, chunk, &mapelms); return (npurged); } -static arena_chunk_t * -chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) -{ - size_t *ndirty = (size_t *)arg; - - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); -} - static void arena_purge(arena_t *arena, bool all) { @@ -905,21 +1015,11 @@ arena_purge(arena_t *arena, bool all) arena->stats.npurge++; /* - * Compute the minimum number of pages that this thread should try to - * purge, and add the result to arena->npurgatory. This will keep - * multiple threads from racing to reduce ndirty below the threshold. + * Add the minimum number of pages this thread should try to purge to + * arena->npurgatory. This will keep multiple threads from racing to + * reduce ndirty below the threshold. */ - { - size_t npurgeable = arena->ndirty - arena->npurgatory; - - if (all == false) { - size_t threshold = (arena->nactive >> - opt_lg_dirty_mult); - - npurgatory = npurgeable - threshold; - } else - npurgatory = npurgeable; - } + npurgatory = arena_compute_npurgatory(arena, all); arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -986,61 +1086,12 @@ arena_purge_all(arena_t *arena) } static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, + size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty) { - arena_chunk_t *chunk; - size_t size, run_ind, run_pages, flag_dirty; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - assert(run_ind >= map_bias); - assert(run_ind < chunk_npages); - if (arena_mapbits_large_get(chunk, run_ind) != 0) { - size = arena_mapbits_large_size_get(chunk, run_ind); - assert(size == PAGE || - arena_mapbits_large_size_get(chunk, - run_ind+(size>>LG_PAGE)-1) == 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } - run_pages = (size >> LG_PAGE); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - - CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); - } - arena->nactive -= run_pages; - - /* - * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated and the caller - * doesn't claim to have cleaned it. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) - dirty = true; - flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - - /* Mark pages as unallocated in the chunk map. */ - if (dirty) { - arena_mapbits_unallocated_set(chunk, run_ind, size, - CHUNK_MAP_DIRTY); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - CHUNK_MAP_DIRTY); - } else { - arena_mapbits_unallocated_set(chunk, run_ind, size, - arena_mapbits_unzeroed_get(chunk, run_ind)); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); - } + size_t size = *p_size; + size_t run_ind = *p_run_ind; + size_t run_pages = *p_run_pages; /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && @@ -1070,8 +1121,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) } /* Try to coalesce backward. */ - if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) - == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, + run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == + flag_dirty) { size_t prun_size = arena_mapbits_unallocated_size_get(chunk, run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; @@ -1096,6 +1148,62 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) size); } + *p_size = size; + *p_run_ind = run_ind; + *p_run_pages = run_pages; +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +{ + arena_chunk_t *chunk; + size_t size, run_ind, run_pages, flag_dirty; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); + assert(size == PAGE || + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } + run_pages = (size >> LG_PAGE); + arena_cactive_update(arena, 0, run_pages); + arena->nactive -= run_pages; + + /* + * The run is dirty if the caller claims to have dirtied it, as well as + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. + */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) + dirty = true; + flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + + /* Mark pages as unallocated in the chunk map. */ + if (dirty) { + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); + } else { + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); + } + + arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, + flag_dirty); + /* Insert into runs_avail, now that coalescing is complete. */ assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); @@ -1263,7 +1371,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); + run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); @@ -1286,7 +1394,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* - * arena_run_alloc() failed, but another thread may have made + * arena_run_alloc_small() failed, but another thread may have made * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ @@ -1321,12 +1429,12 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) arena_chunk_t *chunk; /* - * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore - * it is unsafe to make any assumptions about how run - * has previously been used, and arena_bin_lower_run() - * must be called, as if a region were just deallocated - * from the run. + * arena_run_alloc_small() may have allocated run, or + * it may have pulled run from the bin's run tree. + * Therefore it is unsafe to make any assumptions about + * how run has previously been used, and + * arena_bin_lower_run() must be called, as if a region + * were just deallocated from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); if (run->nfree == bin_info->nregs) @@ -1404,8 +1512,28 @@ arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) } } -void -arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +#ifdef JEMALLOC_JET +#undef arena_redzone_corruption +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl) +#endif +static void +arena_redzone_corruption(void *ptr, size_t usize, bool after, + size_t offset, uint8_t byte) +{ + + malloc_printf(": Corrupt redzone %zu byte%s %s %p " + "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s", + after ? "after" : "before", ptr, usize, byte); +} +#ifdef JEMALLOC_JET +#undef arena_redzone_corruption +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) +arena_redzone_corruption_t *arena_redzone_corruption = + JEMALLOC_N(arena_redzone_corruption_impl); +#endif + +static void +arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) { size_t size = bin_info->reg_size; size_t redzone_size = bin_info->redzone_size; @@ -1413,29 +1541,61 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) bool error = false; for (i = 1; i <= redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); + if (*byte != 0xa5) { error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s before %p (size %zu), byte=%#x\n", i, - (i == 1) ? "" : "s", ptr, size, byte); + arena_redzone_corruption(ptr, size, false, i, *byte); + if (reset) + *byte = 0xa5; } } for (i = 0; i < redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); + if (*byte != 0xa5) { error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s after end of %p (size %zu), byte=%#x\n", - i, (i == 1) ? "" : "s", ptr, size, byte); + arena_redzone_corruption(ptr, size, true, i, *byte); + if (reset) + *byte = 0xa5; } } if (opt_abort && error) abort(); +} +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl) +#endif +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t redzone_size = bin_info->redzone_size; + + arena_redzones_validate(ptr, bin_info, false); memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, bin_info->reg_interval); } +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +arena_dalloc_junk_small_t *arena_dalloc_junk_small = + JEMALLOC_N(arena_dalloc_junk_small_impl); +#endif + +void +arena_quarantine_junk_small(void *ptr, size_t usize) +{ + size_t binind; + arena_bin_info_t *bin_info; + cassert(config_fill); + assert(opt_junk); + assert(opt_quarantine); + assert(usize <= SMALL_MAXCLASS); + + binind = SMALL_SIZE2BIN(usize); + bin_info = &arena_bin_info[binind]; + arena_redzones_validate(ptr, bin_info, true); +} void * arena_malloc_small(arena_t *arena, size_t size, bool zero) @@ -1500,7 +1660,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); + ret = (void *)arena_run_alloc_large(arena, size, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1546,7 +1706,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); + run = arena_run_alloc_large(arena, alloc_size, false); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1566,6 +1726,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, false); } + arena_run_init_large(arena, (arena_run_t *)ret, size, zero); if (config_stats) { arena->stats.nmalloc_large++; @@ -1769,21 +1930,38 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); } +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) +#endif +static void +arena_dalloc_junk_large(void *ptr, size_t usize) +{ + + if (config_fill && opt_junk) + memset(ptr, 0x5a, usize); +} +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) +arena_dalloc_junk_large_t *arena_dalloc_junk_large = + JEMALLOC_N(arena_dalloc_junk_large_impl); +#endif + void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = arena_mapbits_large_size_get(chunk, pageind); + size_t usize = arena_mapbits_large_size_get(chunk, pageind); - if (config_fill && config_stats && opt_junk) - memset(ptr, 0x5a, size); + arena_dalloc_junk_large(ptr, usize); if (config_stats) { arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; + arena->stats.allocated_large -= usize; + arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; } } @@ -1854,9 +2032,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t flag_dirty; size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; - arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, - BININD_INVALID, zero); + arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk + + ((pageind+npages) << LG_PAGE)), splitsize, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; @@ -1895,6 +2072,26 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, return (true); } +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl) +#endif +static void +arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) +{ + + if (config_fill && opt_junk) { + memset((void *)((uintptr_t)ptr + usize), 0x5a, + old_usize - usize); + } +} +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) +arena_ralloc_junk_large_t *arena_ralloc_junk_large = + JEMALLOC_N(arena_ralloc_junk_large_impl); +#endif + /* * Try to resize a large allocation, in order to avoid copying. This will * always fail if growing an object, and the following run is already in use. @@ -1908,10 +2105,6 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } return (false); } else { arena_chunk_t *chunk; @@ -1922,10 +2115,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, if (psize < oldsize) { /* Fill before shrinking in order avoid a race. */ - if (config_fill && opt_junk) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } + arena_ralloc_junk_large(ptr, oldsize, psize); arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, psize); return (false); @@ -1933,17 +2123,23 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); - if (config_fill && ret == false && zero == false && - opt_zero) { - memset((void *)((uintptr_t)ptr + oldsize), 0, - size - oldsize); + if (config_fill && ret == false && zero == false) { + if (opt_junk) { + memset((void *)((uintptr_t)ptr + + oldsize), 0xa5, isalloc(ptr, + config_prof) - oldsize); + } else if (opt_zero) { + memset((void *)((uintptr_t)ptr + + oldsize), 0, isalloc(ptr, + config_prof) - oldsize); + } } return (ret); } } } -void * +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { @@ -1958,25 +2154,20 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, if ((size + extra <= SMALL_MAXCLASS && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) { - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), - 0x5a, oldsize - size); - } - return (ptr); - } + size + extra >= oldsize)) + return (false); } else { assert(size <= arena_maxclass); if (size + extra > SMALL_MAXCLASS) { if (arena_ralloc_large(ptr, oldsize, size, extra, zero) == false) - return (ptr); + return (false); } } } /* Reallocation would require a move. */ - return (NULL); + return (true); } void * @@ -1988,9 +2179,8 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); - if (ret != NULL) - return (ret); + if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false) + return (ptr); /* * size and oldsize are different enough that we need to move the @@ -2001,7 +2191,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); } else ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); @@ -2013,7 +2203,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); } else ret = arena_malloc(arena, size, zero, try_tcache_alloc); @@ -2031,7 +2221,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); + iqalloct(ptr, try_tcache_dalloc); return (ret); } diff --git a/contrib/jemalloc/src/bitmap.c b/contrib/jemalloc/src/bitmap.c index b47e2629093f..e2bd907d558d 100644 --- a/contrib/jemalloc/src/bitmap.c +++ b/contrib/jemalloc/src/bitmap.c @@ -1,4 +1,4 @@ -#define JEMALLOC_BITMAP_C_ +#define JEMALLOC_BITMAP_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c index b17f43f0939d..90ab116ae5fa 100644 --- a/contrib/jemalloc/src/chunk.c +++ b/contrib/jemalloc/src/chunk.c @@ -180,7 +180,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, label_return: if (ret != NULL) { if (config_ivsalloc && base == false) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { chunk_dealloc(ret, size, true); return (NULL); } @@ -321,7 +321,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) assert((size & chunksize_mask) == 0); if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); + rtree_set(chunks_rtree, (uintptr_t)chunk, 0); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); assert(stats_chunks.curchunks >= (size / chunksize)); @@ -356,7 +356,7 @@ chunk_boot(void) extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk); + opt_lg_chunk, base_alloc, NULL); if (chunks_rtree == NULL) return (true); } @@ -368,7 +368,7 @@ void chunk_prefork(void) { - malloc_mutex_lock(&chunks_mtx); + malloc_mutex_prefork(&chunks_mtx); if (config_ivsalloc) rtree_prefork(chunks_rtree); chunk_dss_prefork(); diff --git a/contrib/jemalloc/src/chunk_dss.c b/contrib/jemalloc/src/chunk_dss.c index 24781cc52dca..510bb8bee859 100644 --- a/contrib/jemalloc/src/chunk_dss.c +++ b/contrib/jemalloc/src/chunk_dss.c @@ -28,16 +28,17 @@ static void *dss_max; /******************************************************************************/ -#ifndef JEMALLOC_HAVE_SBRK static void * -sbrk(intptr_t increment) +chunk_dss_sbrk(intptr_t increment) { +#ifdef JEMALLOC_HAVE_SBRK + return (sbrk(increment)); +#else not_implemented(); - return (NULL); -} #endif +} dss_prec_t chunk_dss_prec_get(void) @@ -93,7 +94,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) */ do { /* Get the current end of the DSS. */ - dss_max = sbrk(0); + dss_max = chunk_dss_sbrk(0); /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. @@ -117,7 +118,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) return (NULL); } incr = gap_size + cpad_size + size; - dss_prev = sbrk(incr); + dss_prev = chunk_dss_sbrk(incr); if (dss_prev == dss_max) { /* Success. */ dss_max = dss_next; @@ -163,7 +164,7 @@ chunk_dss_boot(void) if (malloc_mutex_init(&dss_mtx)) return (true); - dss_base = sbrk(0); + dss_base = chunk_dss_sbrk(0); dss_prev = dss_base; dss_max = dss_base; diff --git a/contrib/jemalloc/src/chunk_mmap.c b/contrib/jemalloc/src/chunk_mmap.c index 8a42e75915f8..2056d793f053 100644 --- a/contrib/jemalloc/src/chunk_mmap.c +++ b/contrib/jemalloc/src/chunk_mmap.c @@ -43,7 +43,7 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[BUFERROR_BUF]; - buferror(buf, sizeof(buf)); + buferror(get_errno(), buf, sizeof(buf)); malloc_printf(": Error in " #ifdef _WIN32 "VirtualFree" diff --git a/contrib/jemalloc/src/ckh.c b/contrib/jemalloc/src/ckh.c index 2f38348bb85d..04c52966193a 100644 --- a/contrib/jemalloc/src/ckh.c +++ b/contrib/jemalloc/src/ckh.c @@ -49,7 +49,7 @@ static void ckh_shrink(ckh_t *ckh); * Search bucket for key and return the cell number if found; SIZE_T_MAX * otherwise. */ -JEMALLOC_INLINE size_t +JEMALLOC_INLINE_C size_t ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) { ckhc_t *cell; @@ -67,7 +67,7 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) /* * Search table for key and return cell number if found; SIZE_T_MAX otherwise. */ -JEMALLOC_INLINE size_t +JEMALLOC_INLINE_C size_t ckh_isearch(ckh_t *ckh, const void *key) { size_t hashes[2], bucket, cell; @@ -88,7 +88,7 @@ ckh_isearch(ckh_t *ckh, const void *key) return (cell); } -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, const void *data) { @@ -120,7 +120,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * eviction/relocation procedure until either success or detection of an * eviction/relocation bucket cycle. */ -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, void const **argdata) { @@ -190,7 +190,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, } } -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) { size_t hashes[2], bucket; @@ -219,7 +219,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) * Try to rebuild the hash table from scratch by inserting all items from the * old table into the new. */ -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) { size_t count, i, nins; diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c index ebba7c259972..cc2c5aef570f 100644 --- a/contrib/jemalloc/src/ctl.c +++ b/contrib/jemalloc/src/ctl.c @@ -929,7 +929,7 @@ void ctl_prefork(void) { - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_prefork(&ctl_mtx); } void @@ -1110,6 +1110,8 @@ label_return: \ return (ret); \ } +/******************************************************************************/ + CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int @@ -1131,49 +1133,52 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, return (ret); } -static int -thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - bool oldval; +/******************************************************************************/ - if (config_tcache == false) - return (ENOENT); +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) - oldval = tcache_enabled_get(); - if (newp != NULL) { - if (newlen != sizeof(bool)) { - ret = EINVAL; - goto label_return; - } - tcache_enabled_set(*(bool *)newp); - } - READ(oldval, bool); +/******************************************************************************/ - ret = 0; -label_return: - return (ret); -} +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) -static int -thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (config_tcache == false) - return (ENOENT); - - READONLY(); - WRITEONLY(); - - tcache_flush(); - - ret = 0; -label_return: - return (ret); -} +/******************************************************************************/ static int thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1235,50 +1240,49 @@ CTL_RO_NL_CGEN(config_stats, thread_deallocated, CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, &thread_allocated_tsd_get()->deallocated, uint64_t *) -/******************************************************************************/ +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_mremap) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) + if (config_tcache == false) + return (ENOENT); -/******************************************************************************/ + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); -CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) -CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) -CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) -CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) -CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) -CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) -CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) -CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) -CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) -CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) -CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) + ret = 0; +label_return: + return (ret); +} + +static int +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (config_tcache == false) + return (ENOENT); + + READONLY(); + WRITEONLY(); + + tcache_flush(); + + ret = 0; +label_return: + return (ret); +} /******************************************************************************/ @@ -1390,31 +1394,8 @@ arena_i_index(const size_t *mib, size_t miblen, size_t i) return (ret); } - /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -static const ctl_named_node_t * -arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > NBINS) - return (NULL); - return (super_arenas_bin_i_node); -} - -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -static const ctl_named_node_t * -arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > nlclasses) - return (NULL); - return (super_arenas_lrun_i_node); -} - static int arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1468,7 +1449,28 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +static const ctl_named_node_t * +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > NBINS) + return (NULL); + return (super_arenas_bin_i_node); +} + CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +static const ctl_named_node_t * +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nlclasses) + return (NULL); + return (super_arenas_lrun_i_node); +} static int arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1575,6 +1577,11 @@ CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) /******************************************************************************/ +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) + CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, size_t) CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) @@ -1582,6 +1589,20 @@ CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) + +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) +CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) + CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, @@ -1645,19 +1666,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) -CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) -CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) -CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, - ctl_stats.arenas[mib[2]].astats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, - ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, - ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_purged, - ctl_stats.arenas[mib[2]].astats.purged, uint64_t) - static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1674,8 +1682,3 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) malloc_mutex_unlock(&ctl_mtx); return (ret); } - -CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) -CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) -CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c index aa08d43d3626..cecaf2dfc5db 100644 --- a/contrib/jemalloc/src/huge.c +++ b/contrib/jemalloc/src/huge.c @@ -78,7 +78,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) return (ret); } -void * +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { @@ -89,15 +89,11 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { assert(CHUNK_CEILING(oldsize) == oldsize); - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } - return (ptr); + return (false); } /* Reallocation would require a move. */ - return (NULL); + return (true); } void * @@ -108,9 +104,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t copysize; /* Try to avoid moving the allocation. */ - ret = huge_ralloc_no_move(ptr, oldsize, size, extra); - if (ret != NULL) - return (ret); + if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false) + return (ptr); /* * size and oldsize are different enough that we need to use a @@ -169,7 +164,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ char buf[BUFERROR_BUF]; - buferror(buf, sizeof(buf)); + buferror(get_errno(), buf, sizeof(buf)); malloc_printf(": Error in mremap(): %s\n", buf); if (opt_abort) @@ -181,11 +176,34 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); + iqalloct(ptr, try_tcache_dalloc); } return (ret); } +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) +#endif +static void +huge_dalloc_junk(void *ptr, size_t usize) +{ + + if (config_fill && config_dss && opt_junk) { + /* + * Only bother junk filling if the chunk isn't about to be + * unmapped. + */ + if (config_munmap == false || (config_dss && chunk_in_dss(ptr))) + memset(ptr, 0x5a, usize); + } +} +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) +huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); +#endif + void huge_dalloc(void *ptr, bool unmap) { @@ -208,8 +226,8 @@ huge_dalloc(void *ptr, bool unmap) malloc_mutex_unlock(&huge_mtx); - if (unmap && config_fill && config_dss && opt_junk) - memset(node->addr, 0x5a, node->size); + if (unmap) + huge_dalloc_junk(node->addr, node->size); chunk_dealloc(node->addr, node->size, unmap); diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index 1412a5e409bf..42f97b4dbb7a 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -104,18 +104,12 @@ typedef struct { #endif /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void stats_print_atexit(void); -static unsigned malloc_ncpus(void); -static bool malloc_conf_next(char const **opts_p, char const **k_p, - size_t *klen_p, char const **v_p, size_t *vlen_p); -static void malloc_conf_error(const char *msg, const char *k, size_t klen, - const char *v, size_t vlen); -static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment); /******************************************************************************/ /* @@ -256,7 +250,6 @@ stats_print_atexit(void) static unsigned malloc_ncpus(void) { - unsigned ret; long result; #ifdef _WIN32 @@ -266,14 +259,7 @@ malloc_ncpus(void) #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif - if (result == -1) { - /* Error. */ - ret = 1; - } else { - ret = (unsigned)result; - } - - return (ret); + return ((result == -1) ? 1 : (unsigned)result); } void @@ -489,8 +475,7 @@ malloc_conf_init(void) } break; } default: - /* NOTREACHED */ - assert(false); + not_reached(); buf[0] = '\0'; opts = buf; } @@ -527,14 +512,15 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if (um < min) \ + if (min != 0 && um < min) \ o = min; \ else if (um > max) \ o = max; \ else \ o = um; \ } else { \ - if (um < min || um > max) { \ + if ((min != 0 && um < min) || \ + um > max) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ @@ -700,17 +686,6 @@ malloc_init_hard(void) malloc_conf_init(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32)) - /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, - jemalloc_postfork_child) != 0) { - malloc_write(": Error in pthread_atfork()\n"); - if (opt_abort) - abort(); - } -#endif - if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { @@ -750,8 +725,10 @@ malloc_init_hard(void) return (true); } - if (malloc_mutex_init(&arenas_lock)) + if (malloc_mutex_init(&arenas_lock)) { + malloc_mutex_unlock(&init_lock); return (true); + } /* * Create enough scaffolding to allow recursive allocation in @@ -797,9 +774,25 @@ malloc_init_hard(void) return (true); } - /* Get number of CPUs. */ malloc_mutex_unlock(&init_lock); + /**********************************************************************/ + /* Recursive allocation may follow. */ + ncpus = malloc_ncpus(); + +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) + /* LinuxThreads's pthread_atfork() allocates. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { + malloc_write(": Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } +#endif + + /* Done recursively allocating. */ + /**********************************************************************/ malloc_mutex_lock(&init_lock); if (mutex_boot()) { @@ -846,6 +839,7 @@ malloc_init_hard(void) malloc_initialized = true; malloc_mutex_unlock(&init_lock); + return (false); } @@ -857,42 +851,88 @@ malloc_init_hard(void) * Begin malloc(3)-compatible functions. */ +static void * +imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = imalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imalloc_prof_sample(usize, cnt); + else + p = imalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +/* + * MALLOC_BODY() is a macro rather than a function because its contents are in + * the fast path, but inlining would cause reliability issues when determining + * how many frames to discard from heap profiling backtraces. + */ +#define MALLOC_BODY(ret, size, usize) do { \ + if (malloc_init()) \ + ret = NULL; \ + else { \ + if (config_prof && opt_prof) { \ + prof_thr_cnt_t *cnt; \ + \ + usize = s2u(size); \ + /* \ + * Call PROF_ALLOC_PREP() here rather than in \ + * imalloc_prof() so that imalloc_prof() can be \ + * inlined without introducing uncertainty \ + * about the number of backtrace frames to \ + * ignore. imalloc_prof() is in the fast path \ + * when heap profiling is enabled, so inlining \ + * is critical to performance. (For \ + * consistency all callers of PROF_ALLOC_PREP() \ + * are structured similarly, even though e.g. \ + * realloc() isn't called enough for inlining \ + * to be critical.) \ + */ \ + PROF_ALLOC_PREP(1, usize, cnt); \ + ret = imalloc_prof(usize, cnt); \ + } else { \ + if (config_stats || (config_valgrind && \ + opt_valgrind)) \ + usize = s2u(size); \ + ret = imalloc(size); \ + } \ + } \ +} while (0) + void * je_malloc(size_t size) { void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - - if (malloc_init()) { - ret = NULL; - goto label_oom; - } if (size == 0) size = 1; - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = imalloc(size); - } else { - if (config_stats || (config_valgrind && opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } + MALLOC_BODY(ret, size, usize); -label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -901,8 +941,6 @@ je_malloc(size_t size) } set_errno(ENOMEM); } - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; @@ -912,6 +950,42 @@ je_malloc(size_t size) return (ret); } +static void * +imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); + p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, + false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = ipalloc(usize, alignment, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imemalign_prof_sample(alignment, usize, cnt); + else + p = ipalloc(usize, alignment, false); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + JEMALLOC_ATTR(nonnull(1)) #ifdef JEMALLOC_PROF /* @@ -921,19 +995,18 @@ JEMALLOC_ATTR(nonnull(1)) JEMALLOC_NOINLINE #endif static int -imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment) +imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); assert(min_alignment != 0); - if (malloc_init()) + if (malloc_init()) { result = NULL; - else { + goto label_oom; + } else { if (size == 0) size = 1; @@ -953,57 +1026,38 @@ imemalign(void **memptr, size_t alignment, size_t size, usize = sa2u(size, alignment); if (usize == 0) { result = NULL; - ret = ENOMEM; - goto label_return; + goto label_oom; } if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(2, usize, cnt); - if (cnt == NULL) { - result = NULL; - ret = EINVAL; - } else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= SMALL_MAXCLASS) { - assert(sa2u(SMALL_MAXCLASS+1, - alignment) != 0); - result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment), alignment, false); - if (result != NULL) { - arena_prof_promoted(result, - usize); - } - } else { - result = ipalloc(usize, alignment, - false); - } - } + result = imemalign_prof(alignment, usize, cnt); } else result = ipalloc(usize, alignment, false); - } - - if (result == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error allocating aligned " - "memory: out of memory\n"); - abort(); - } - ret = ENOMEM; - goto label_return; + if (result == NULL) + goto label_oom; } *memptr = result; ret = 0; - label_return: if (config_stats && result != NULL) { assert(usize == isalloc(result, config_prof)); thread_allocated_tsd_get()->allocated += usize; } - if (config_prof && opt_prof && result != NULL) - prof_malloc(result, usize, cnt); UTRACE(0, size, result); return (ret); +label_oom: + assert(result == NULL); + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error allocating aligned memory: " + "out of memory\n"); + abort(); + } + ret = ENOMEM; + goto label_return; } int @@ -1030,13 +1084,46 @@ je_aligned_alloc(size_t alignment, size_t size) return (ret); } +static void * +icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = icalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = icalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = icalloc_prof_sample(usize, cnt); + else + p = icalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + void * je_calloc(size_t num, size_t size) { void *ret; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { num_size = 0; @@ -1065,19 +1152,11 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + usize = s2u(num_size); PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_return; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= SMALL_MAXCLASS) { - ret = icalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = icalloc(num_size); + ret = icalloc_prof(usize, cnt); } else { if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(num_size); @@ -1093,9 +1172,6 @@ je_calloc(size_t num, size_t size) } set_errno(ENOMEM); } - - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; @@ -1105,152 +1181,126 @@ je_calloc(size_t num, size_t size) return (ret); } +static void * +irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = iralloc(oldptr, usize, 0, 0, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irealloc_prof_sample(oldptr, usize, cnt); + else + p = iralloc(oldptr, usize, 0, 0, false); + if (p == NULL) + return (NULL); + prof_realloc(p, usize, cnt, old_usize, old_ctx); + + return (p); +} + +JEMALLOC_INLINE_C void +ifree(void *ptr) +{ + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + void * je_realloc(void *ptr, size_t size) { void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - size_t old_size = 0; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); + size_t old_usize = 0; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); if (size == 0) { if (ptr != NULL) { - /* realloc(ptr, 0) is equivalent to free(p). */ - assert(malloc_initialized || IS_INITIALIZER); - if (config_prof) { - old_size = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_size = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); - } - if (config_prof && opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } - iqalloc(ptr); - ret = NULL; - goto label_return; - } else - size = 1; + /* realloc(ptr, 0) is equivalent to free(ptr). */ + UTRACE(ptr, 0, 0); + ifree(ptr); + return (NULL); + } + size = 1; } if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if (config_prof) { - old_size = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_size = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); - } + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + usize = s2u(size); - old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - old_ctx = NULL; - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= SMALL_MAXCLASS) { - ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, - false, false); - if (ret != NULL) - arena_prof_promoted(ret, usize); - else - old_ctx = NULL; - } else { - ret = iralloc(ptr, size, 0, 0, false, false); - if (ret == NULL) - old_ctx = NULL; - } + ret = irealloc_prof(ptr, old_usize, usize, cnt); } else { if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); - ret = iralloc(ptr, size, 0, 0, false, false); - } - -label_oom: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); + ret = iralloc(ptr, size, 0, 0, false); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - if (config_prof && opt_prof) - old_ctx = NULL; - if (malloc_init()) { - if (config_prof && opt_prof) - cnt = NULL; - ret = NULL; - } else { - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - ret = NULL; - else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) { - arena_prof_promoted(ret, - usize); - } - } else - ret = imalloc(size); - } - } else { - if (config_stats || (config_valgrind && - opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } - } - - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } + MALLOC_BODY(ret, size, usize); } -label_return: - if (config_prof && opt_prof) - prof_realloc(ret, usize, cnt, old_size, old_ctx); + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } if (config_stats && ret != NULL) { thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, + false); return (ret); } @@ -1259,24 +1309,8 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (ptr != NULL) { - size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - - assert(malloc_initialized || IS_INITIALIZER); - - if (config_prof && opt_prof) { - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } else if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqalloc(ptr); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); - } + if (ptr != NULL) + ifree(ptr); } /* @@ -1342,28 +1376,440 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ -size_t -je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +JEMALLOC_ALWAYS_INLINE_C void * +imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { - size_t ret; + + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); + + if (alignment != 0) + return (ipalloct(usize, alignment, zero, try_tcache, arena)); + else if (zero) + return (icalloct(usize, try_tcache, arena)); + else + return (imalloct(usize, try_tcache, arena)); +} + +static void * +imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + size_t usize_promoted = (alignment == 0) ? + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); + assert(usize_promoted != 0); + p = imallocx(usize_promoted, alignment, zero, try_tcache, + arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) { + p = imallocx_prof_sample(usize, alignment, zero, try_tcache, + arena, cnt); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +void * +je_mallocx(size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; + + assert(size != 0); + + if (malloc_init()) + goto label_oom; + + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + PROF_ALLOC_PREP(1, usize, cnt); + p = imallocx_prof(usize, alignment, zero, try_tcache, arena, + cnt); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + goto label_oom; + + if (config_stats) { + assert(usize == isalloc(p, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in mallocx(): out of memory\n"); + abort(); + } + UTRACE(0, size, 0); + return (NULL); +} + +static void * +irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, + size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + if (p == NULL) + return (NULL); + + if (p == oldptr && alignment != 0) { + /* + * The allocation did not move, so it is possible that the size + * class is smaller than would guarantee the requested + * alignment, and that the alignment constraint was + * serendipitously satisfied. Additionally, old_usize may not + * be the same as the current usize because of in-place large + * reallocation. Therefore, query the actual value of usize. + */ + *usize = isalloc(p, config_prof); + } + prof_realloc(p, *usize, cnt, old_usize, old_ctx); + + return (p); +} + +void * +je_rallocx(void *ptr, size_t size, int flags) +{ + void *p; + size_t usize, old_usize; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = false; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache_dalloc = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + PROF_ALLOC_PREP(1, usize, cnt); + p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + if (p == NULL) + goto label_oom; + } else { + p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc, + try_tcache_dalloc, arena); + if (p == NULL) + goto label_oom; + if (config_stats || (config_valgrind && opt_valgrind)) + usize = isalloc(p, config_prof); + } + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_usize; + } + UTRACE(ptr, size, p); + JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in rallocx(): out of memory\n"); + abort(); + } + UTRACE(ptr, size, 0); + return (NULL); +} + +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, bool zero, arena_t *arena) +{ + size_t usize; + + if (ixalloc(ptr, size, extra, alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + + return (usize); +} + +static size_t +ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + + if (cnt == NULL) + return (old_usize); + /* Use minimum usize to determine whether promotion may happen. */ + if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment)) <= SMALL_MAXCLASS) { + if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), + alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + if (max_usize < PAGE) + arena_prof_promoted(ptr, usize); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + + return (usize); +} + +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(ptr); + if ((uintptr_t)cnt != (uintptr_t)1U) { + usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + alignment, zero, max_usize, arena, cnt); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + if (usize == old_usize) + return (usize); + prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + + return (usize); +} + +size_t +je_xallocx(void *ptr, size_t size, size_t extra, int flags) +{ + size_t usize, old_usize; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) + arena = arenas[arena_ind]; + else + arena = NULL; + + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + /* + * usize isn't knowable before ixalloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in PROF_ALLOC_PREP() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment); + PROF_ALLOC_PREP(1, max_usize, cnt); + usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, + max_usize, zero, arena, cnt); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + if (usize == old_usize) + goto label_not_resized; + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_usize; + } + JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); +label_not_resized: + UTRACE(ptr, size, ptr); + return (usize); +} + +size_t +je_sallocx(const void *ptr, int flags) +{ + size_t usize; assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) - ret = ivsalloc(ptr, config_prof); - else - ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + usize = ivsalloc(ptr, config_prof); + else { + assert(ptr != NULL); + usize = isalloc(ptr, config_prof); + } - return (ret); + return (usize); } void -je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, - const char *opts) +je_dallocx(void *ptr, int flags) { + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; - stats_print(write_cb, cbopaque, opts); + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + + UTRACE(ptr, 0, 0); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_prof && opt_prof) { + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloct(ptr, try_tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + +size_t +je_nallocx(size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (0); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + return (usize); } int @@ -1398,6 +1844,30 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } +void +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +size_t +je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +{ + size_t ret; + + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (config_ivsalloc) + ret = ivsalloc(ptr, config_prof); + else + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + + return (ret); +} + /* * End non-standard functions. */ @@ -1407,284 +1877,65 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_ALWAYS_INLINE_C void * -iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) -{ - - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, - alignment))); - - if (alignment != 0) - return (ipallocx(usize, alignment, zero, try_tcache, arena)); - else if (zero) - return (icallocx(usize, try_tcache, arena)); - else - return (imallocx(usize, try_tcache, arena)); -} - int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { void *p; - size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - arena_t *arena; - bool try_tcache; assert(ptr != NULL); - assert(size != 0); - if (malloc_init()) - goto label_oom; - - if (arena_ind != UINT_MAX) { - arena = arenas[arena_ind]; - try_tcache = false; - } else { - arena = NULL; - try_tcache = true; - } - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (usize == 0) - goto label_oom; - - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - goto label_oom; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero, - try_tcache, arena); - if (p == NULL) - goto label_oom; - arena_prof_promoted(p, usize); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } - prof_malloc(p, usize, cnt); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } + p = je_mallocx(size, flags); + if (p == NULL) + return (ALLOCM_ERR_OOM); if (rsize != NULL) - *rsize = usize; - + *rsize = isalloc(p, config_prof); *ptr = p; - if (config_stats) { - assert(usize == isalloc(p, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); return (ALLOCM_SUCCESS); -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in allocm(): " - "out of memory\n"); - abort(); - } - *ptr = NULL; - UTRACE(0, size, 0); - return (ALLOCM_ERR_OOM); } int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { - void *p, *q; - size_t usize; - size_t old_size; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; + int ret; bool no_move = flags & ALLOCM_NO_MOVE; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache_alloc, try_tcache_dalloc; - arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk; - try_tcache_alloc = true; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); - try_tcache_dalloc = (chunk == *ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; - arena = NULL; - } - - p = *ptr; - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - /* - * usize isn't knowable before iralloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - prof_ctx_t *old_ctx = prof_ctx_get(p); - old_size = isalloc(p, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(p); - PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) - goto label_oom; - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) - <= SMALL_MAXCLASS) { - q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move, try_tcache_alloc, - try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (max_usize < PAGE) { - usize = max_usize; - arena_prof_promoted(q, usize); - } else - usize = isalloc(q, config_prof); - } else { - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - usize = isalloc(q, config_prof); - } - prof_realloc(q, usize, cnt, old_size, old_ctx); + if (no_move) { + size_t usize = je_xallocx(*ptr, size, extra, flags); + ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED; if (rsize != NULL) *rsize = usize; } else { - if (config_stats) { - old_size = isalloc(p, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(p, false); - old_rzsize = u2rz(old_size); - } - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (config_stats) - usize = isalloc(q, config_prof); - if (rsize != NULL) { - if (config_stats == false) - usize = isalloc(q, config_prof); - *rsize = usize; - } + void *p = je_rallocx(*ptr, size+extra, flags); + if (p != NULL) { + *ptr = p; + ret = ALLOCM_SUCCESS; + } else + ret = ALLOCM_ERR_OOM; + if (rsize != NULL) + *rsize = isalloc(*ptr, config_prof); } - - *ptr = q; - if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_size; - } - UTRACE(p, size, q); - JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); - return (ALLOCM_SUCCESS); -label_err: - if (no_move) { - UTRACE(p, size, q); - return (ALLOCM_ERR_NOT_MOVED); - } -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in rallocm(): " - "out of memory\n"); - abort(); - } - UTRACE(p, size, 0); - return (ALLOCM_ERR_OOM); + return (ret); } int je_sallocm(const void *ptr, size_t *rsize, int flags) { - size_t sz; - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (config_ivsalloc) - sz = ivsalloc(ptr, config_prof); - else { - assert(ptr != NULL); - sz = isalloc(ptr, config_prof); - } assert(rsize != NULL); - *rsize = sz; - + *rsize = je_sallocx(ptr, flags); return (ALLOCM_SUCCESS); } int je_dallocm(void *ptr, int flags) { - size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache; - - assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); - - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - } else - try_tcache = true; - - UTRACE(ptr, 0, 0); - if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_prof && opt_prof) { - if (config_stats == false && config_valgrind == false) - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqallocx(ptr, try_tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + je_dallocx(ptr, flags); return (ALLOCM_SUCCESS); } @@ -1692,18 +1943,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags) { size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - assert(size != 0); - - if (malloc_init()) - return (ALLOCM_ERR_OOM); - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + usize = je_nallocx(size, flags); if (usize == 0) return (ALLOCM_ERR_OOM); - if (rsize != NULL) *rsize = usize; return (ALLOCM_SUCCESS); diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c index 6b6f438781e6..6f5954e52aea 100644 --- a/contrib/jemalloc/src/mutex.c +++ b/contrib/jemalloc/src/mutex.c @@ -6,7 +6,7 @@ #endif #ifndef _CRT_SPINCOUNT -#define _CRT_SPINCOUNT 4000 +#define _CRT_SPINCOUNT 4000 #endif /******************************************************************************/ diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c index c133b95c2c6c..1d8ccbd60ae0 100644 --- a/contrib/jemalloc/src/prof.c +++ b/contrib/jemalloc/src/prof.c @@ -24,7 +24,12 @@ bool opt_prof_gdump = false; bool opt_prof_final = true; bool opt_prof_leak = false; bool opt_prof_accum = false; -char opt_prof_prefix[PATH_MAX + 1]; +char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; uint64_t prof_interval = 0; bool prof_promote; @@ -54,46 +59,23 @@ static uint64_t prof_dump_useq; /* * This buffer is rather large for stack allocation, so use a single buffer for - * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since - * it must be locked anyway during dumping. + * all profile dumps. */ -static char prof_dump_buf[PROF_DUMP_BUFSIZE]; +static malloc_mutex_t prof_dump_mtx; +static char prof_dump_buf[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PROF_DUMP_BUFSIZE +#else + 1 +#endif +]; static unsigned prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_destroy(prof_bt_t *bt); -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code prof_unwind_init_callback( - struct _Unwind_Context *context, void *arg); -static _Unwind_Reason_Code prof_unwind_callback( - struct _Unwind_Context *context, void *arg); -#endif -static bool prof_flush(bool propagate_err); -static bool prof_write(bool propagate_err, const char *s); -static bool prof_printf(bool propagate_err, const char *format, ...) - JEMALLOC_ATTR(format(printf, 2, 3)); -static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, - size_t *leak_nctx); -static void prof_ctx_destroy(prof_ctx_t *ctx); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, - prof_bt_t *bt); -static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(bool propagate_err, const char *filename, - bool leakcheck); -static void prof_dump_filename(char *filename, char v, int64_t vseq); -static void prof_fdump(void); -static void prof_bt_hash(const void *key, size_t r_hash[2]); -static bool prof_bt_keycomp(const void *k1, const void *k2); -static malloc_mutex_t *prof_ctx_mutex_choose(void); - /******************************************************************************/ void @@ -423,251 +405,32 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) { cassert(config_prof); - assert(false); + not_reached(); } #endif -prof_thr_cnt_t * -prof_lookup(prof_bt_t *bt) +static malloc_mutex_t * +prof_ctx_mutex_choose(void) { - union { - prof_thr_cnt_t *p; - void *v; - } ret; - prof_tdata_t *prof_tdata; + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); - cassert(config_prof); - - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return (NULL); - - if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { - union { - prof_bt_t *p; - void *v; - } btkey; - union { - prof_ctx_t *p; - void *v; - } ctx; - bool new_ctx; - - /* - * This thread's cache lacks bt. Look for it in the global - * cache. - */ - prof_enter(prof_tdata); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { - /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); - if (ctx.v == NULL) { - prof_leave(prof_tdata); - return (NULL); - } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(prof_tdata); - idalloc(ctx.v); - return (NULL); - } - ctx.p->bt = btkey.p; - ctx.p->lock = prof_ctx_mutex_choose(); - /* - * Set nlimbo to 1, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - ctx.p->nlimbo = 1; - memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx.p->cnts_ql); - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { - /* OOM. */ - prof_leave(prof_tdata); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } - new_ctx = true; - } else { - /* - * Increment nlimbo, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - malloc_mutex_lock(ctx.p->lock); - ctx.p->nlimbo++; - malloc_mutex_unlock(ctx.p->lock); - new_ctx = false; - } - prof_leave(prof_tdata); - - /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { - assert(ckh_count(&prof_tdata->bt2cnt) > 0); - /* - * Flush the least recently used cnt in order to keep - * bt2cnt from becoming too large. - */ - ret.p = ql_last(&prof_tdata->lru_ql, lru_link); - assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - assert(false); - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - prof_ctx_merge(ret.p->ctx, ret.p); - /* ret can now be re-used. */ - } else { - assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); - /* Allocate and partially initialize a new cnt. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx.p); - return (NULL); - } - ql_elm_new(ret.p, cnts_link); - ql_elm_new(ret.p, lru_link); - } - /* Finish initializing ret. */ - ret.p->ctx = ctx.p; - ret.p->epoch = 0; - memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx.p); - idalloc(ret.v); - return (NULL); - } - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(ctx.p->lock); - ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->nlimbo--; - malloc_mutex_unlock(ctx.p->lock); - } else { - /* Move ret to the front of the LRU. */ - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - } - - return (ret.p); -} - -static bool -prof_flush(bool propagate_err) -{ - bool ret = false; - ssize_t err; - - cassert(config_prof); - - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); - if (err == -1) { - if (propagate_err == false) { - malloc_write(": write() failed during heap " - "profile flush\n"); - if (opt_abort) - abort(); - } - ret = true; - } - prof_dump_buf_end = 0; - - return (ret); -} - -static bool -prof_write(bool propagate_err, const char *s) -{ - unsigned i, slen, n; - - cassert(config_prof); - - i = 0; - slen = strlen(s); - while (i < slen) { - /* Flush the buffer if it is full. */ - if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) - if (prof_flush(propagate_err) && propagate_err) - return (true); - - if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { - /* Finish writing. */ - n = slen - i; - } else { - /* Write as much of s as will fit. */ - n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; - } - memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); - prof_dump_buf_end += n; - i += n; - } - - return (false); -} - -JEMALLOC_ATTR(format(printf, 2, 3)) -static bool -prof_printf(bool propagate_err, const char *format, ...) -{ - bool ret; - va_list ap; - char buf[PROF_PRINTF_BUFSIZE]; - - va_start(ap, format); - malloc_vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); - ret = prof_write(propagate_err, buf); - - return (ret); + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); } static void -prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) { - prof_thr_cnt_t *thr_cnt; - prof_cnt_t tcnt; - cassert(config_prof); - - malloc_mutex_lock(ctx->lock); - - memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { - volatile unsigned *epoch = &thr_cnt->epoch; - - while (true) { - unsigned epoch0 = *epoch; - - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); - - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } - - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; - if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; - } - } - - if (ctx->cnt_summed.curobjs != 0) - (*leak_nctx)++; - - /* Add to cnt_all. */ - cnt_all->curobjs += ctx->cnt_summed.curobjs; - cnt_all->curbytes += ctx->cnt_summed.curbytes; - if (opt_prof_accum) { - cnt_all->accumobjs += ctx->cnt_summed.accumobjs; - cnt_all->accumbytes += ctx->cnt_summed.accumbytes; - } - - malloc_mutex_unlock(ctx->lock); + ctx->bt = bt; + ctx->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx->nlimbo = 1; + ql_elm_new(ctx, dump_link); + memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx->cnts_ql); } static void @@ -695,7 +458,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - assert(false); + not_reached(); prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); @@ -751,8 +514,369 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) } static bool -prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) +prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, + prof_ctx_t **p_ctx, bool *p_new_ctx) { + union { + prof_ctx_t *p; + void *v; + } ctx; + union { + prof_bt_t *p; + void *v; + } btkey; + bool new_ctx; + + prof_enter(prof_tdata); + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + /* bt has never been seen before. Insert it. */ + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { + prof_leave(prof_tdata); + return (true); + } + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { + prof_leave(prof_tdata); + idalloc(ctx.v); + return (true); + } + prof_ctx_init(ctx.p, btkey.p); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + /* OOM. */ + prof_leave(prof_tdata); + idalloc(btkey.v); + idalloc(ctx.v); + return (true); + } + new_ctx = true; + } else { + /* + * Increment nlimbo, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(ctx.p->lock); + ctx.p->nlimbo++; + malloc_mutex_unlock(ctx.p->lock); + new_ctx = false; + } + prof_leave(prof_tdata); + + *p_btkey = btkey.v; + *p_ctx = ctx.p; + *p_new_ctx = new_ctx; + return (false); +} + +prof_thr_cnt_t * +prof_lookup(prof_bt_t *bt) +{ + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (NULL); + + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + void *btkey; + prof_ctx_t *ctx; + bool new_ctx; + + /* + * This thread's cache lacks bt. Look for it in the global + * cache. + */ + if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) + return (NULL); + + /* Link a prof_thd_cnt_t into ctx for this thread. */ + if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); + /* + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. + */ + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + not_reached(); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + if (new_ctx) + prof_ctx_destroy(ctx); + return (NULL); + } + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); + } + /* Finish initializing ret. */ + ret.p->ctx = ctx; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { + if (new_ctx) + prof_ctx_destroy(ctx); + idalloc(ret.v); + return (NULL); + } + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(ctx->lock); + ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); + ctx->nlimbo--; + malloc_mutex_unlock(ctx->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + } + + return (ret.p); +} + +#ifdef JEMALLOC_JET +size_t +prof_bt_count(void) +{ + size_t bt_count; + prof_tdata_t *prof_tdata; + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (0); + + prof_enter(prof_tdata); + bt_count = ckh_count(&bt2ctx); + prof_leave(prof_tdata); + + return (bt_count); +} +#endif + +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open_impl) +#endif +static int +prof_dump_open(bool propagate_err, const char *filename) +{ + int fd; + + fd = creat(filename, 0644); + if (fd == -1 && propagate_err == false) { + malloc_printf(": creat(\"%s\"), 0644) failed\n", + filename); + if (opt_abort) + abort(); + } + + return (fd); +} +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open) +prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); +#endif + +static bool +prof_dump_flush(bool propagate_err) +{ + bool ret = false; + ssize_t err; + + cassert(config_prof); + + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + if (err == -1) { + if (propagate_err == false) { + malloc_write(": write() failed during heap " + "profile flush\n"); + if (opt_abort) + abort(); + } + ret = true; + } + prof_dump_buf_end = 0; + + return (ret); +} + +static bool +prof_dump_close(bool propagate_err) +{ + bool ret; + + assert(prof_dump_fd != -1); + ret = prof_dump_flush(propagate_err); + close(prof_dump_fd); + prof_dump_fd = -1; + + return (ret); +} + +static bool +prof_dump_write(bool propagate_err, const char *s) +{ + unsigned i, slen, n; + + cassert(config_prof); + + i = 0; + slen = strlen(s); + while (i < slen) { + /* Flush the buffer if it is full. */ + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) + if (prof_dump_flush(propagate_err) && propagate_err) + return (true); + + if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { + /* Finish writing. */ + n = slen - i; + } else { + /* Write as much of s as will fit. */ + n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; + } + memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); + prof_dump_buf_end += n; + i += n; + } + + return (false); +} + +JEMALLOC_ATTR(format(printf, 2, 3)) +static bool +prof_dump_printf(bool propagate_err, const char *format, ...) +{ + bool ret; + va_list ap; + char buf[PROF_PRINTF_BUFSIZE]; + + va_start(ap, format); + malloc_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + ret = prof_dump_write(propagate_err, buf); + + return (ret); +} + +static void +prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, + prof_ctx_list_t *ctx_ql) +{ + prof_thr_cnt_t *thr_cnt; + prof_cnt_t tcnt; + + cassert(config_prof); + + malloc_mutex_lock(ctx->lock); + + /* + * Increment nlimbo so that ctx won't go away before dump. + * Additionally, link ctx into the dump list so that it is included in + * prof_dump()'s second pass. + */ + ctx->nlimbo++; + ql_tail_insert(ctx_ql, ctx, dump_link); + + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { + volatile unsigned *epoch = &thr_cnt->epoch; + + while (true) { + unsigned epoch0 = *epoch; + + /* Make sure epoch is even. */ + if (epoch0 & 1U) + continue; + + memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + + /* Terminate if epoch didn't change while reading. */ + if (*epoch == epoch0) + break; + } + + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; + + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + } + + malloc_mutex_unlock(ctx->lock); +} + +static bool +prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) +{ + + if (opt_lg_prof_sample == 0) { + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes)) + return (true); + } else { + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) + return (true); + } + + return (false); +} + +static void +prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +{ + + ctx->nlimbo--; + ql_remove(ctx_ql, ctx, dump_link); +} + +static void +prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +{ + + malloc_mutex_lock(ctx->lock); + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + malloc_mutex_unlock(ctx->lock); +} + +static bool +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, + prof_ctx_list_t *ctx_ql) +{ + bool ret; unsigned i; cassert(config_prof); @@ -764,36 +888,49 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) * filled in. Avoid dumping any ctx that is an artifact of either * implementation detail. */ + malloc_mutex_lock(ctx->lock); if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); - return (false); + ret = false; + goto label_return; } - if (prof_printf(propagate_err, "%"PRId64": %"PRId64 + if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 " [%"PRIu64": %"PRIu64"] @", ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, - ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) - return (true); - - for (i = 0; i < bt->len; i++) { - if (prof_printf(propagate_err, " %#"PRIxPTR, - (uintptr_t)bt->vec[i])) - return (true); + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { + ret = true; + goto label_return; } - if (prof_write(propagate_err, "\n")) - return (true); + for (i = 0; i < bt->len; i++) { + if (prof_dump_printf(propagate_err, " %#"PRIxPTR, + (uintptr_t)bt->vec[i])) { + ret = true; + goto label_return; + } + } - return (false); + if (prof_dump_write(propagate_err, "\n")) { + ret = true; + goto label_return; + } + + ret = false; +label_return: + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + malloc_mutex_unlock(ctx->lock); + return (ret); } static bool prof_dump_maps(bool propagate_err) { + bool ret; int mfd; char filename[PATH_MAX + 1]; @@ -805,25 +942,52 @@ prof_dump_maps(bool propagate_err) if (mfd != -1) { ssize_t nread; - if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && - propagate_err) - return (true); + if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && + propagate_err) { + ret = true; + goto label_return; + } nread = 0; do { prof_dump_buf_end += nread; if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ - if (prof_flush(propagate_err) && propagate_err) - return (true); + if (prof_dump_flush(propagate_err) && + propagate_err) { + ret = true; + goto label_return; + } } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE - prof_dump_buf_end); } while (nread > 0); - close(mfd); - } else - return (true); + } else { + ret = true; + goto label_return; + } - return (false); + ret = false; +label_return: + if (mfd != -1) + close(mfd); + return (ret); +} + +static void +prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, + const char *filename) +{ + + if (cnt_all->curbytes != 0) { + malloc_printf(": Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", + cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + ": Run pprof on \"%s\" for leak detail\n", + filename); + } } static bool @@ -832,99 +996,75 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; - union { - prof_bt_t *p; - void *v; - } bt; union { prof_ctx_t *p; void *v; } ctx; size_t leak_nctx; + prof_ctx_list_t ctx_ql; cassert(config_prof); prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); - prof_enter(prof_tdata); - prof_dump_fd = creat(filename, 0644); - if (prof_dump_fd == -1) { - if (propagate_err == false) { - malloc_printf( - ": creat(\"%s\"), 0644) failed\n", - filename); - if (opt_abort) - abort(); - } - goto label_error; - } + + malloc_mutex_lock(&prof_dump_mtx); /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; + ql_new(&ctx_ql); + prof_enter(prof_tdata); for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); + prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); + prof_leave(prof_tdata); + + /* Create dump file. */ + if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) + goto label_open_close_error; /* Dump profile header. */ - if (opt_lg_prof_sample == 0) { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heapprofile\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes)) - goto label_error; - } else { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes, - ((uint64_t)1U << opt_lg_prof_sample))) - goto label_error; - } + if (prof_dump_header(propagate_err, &cnt_all)) + goto label_write_error; - /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) - == false;) { - if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) - goto label_error; + /* Dump per ctx profile stats. */ + while ((ctx.p = ql_first(&ctx_ql)) != NULL) { + if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) + goto label_write_error; } /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) - goto label_error; + goto label_write_error; - if (prof_flush(propagate_err)) - goto label_error; - close(prof_dump_fd); - prof_leave(prof_tdata); + if (prof_dump_close(propagate_err)) + goto label_open_close_error; - if (leakcheck && cnt_all.curbytes != 0) { - malloc_printf(": Leak summary: %"PRId64" byte%s, %" - PRId64" object%s, %zu context%s\n", - cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", - cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", - leak_nctx, (leak_nctx != 1) ? "s" : ""); - malloc_printf( - ": Run pprof on \"%s\" for leak detail\n", - filename); - } + malloc_mutex_unlock(&prof_dump_mtx); + + if (leakcheck) + prof_leakcheck(&cnt_all, leak_nctx, filename); return (false); -label_error: - prof_leave(prof_tdata); +label_write_error: + prof_dump_close(propagate_err); +label_open_close_error: + while ((ctx.p = ql_first(&ctx_ql)) != NULL) + prof_dump_ctx_cleanup(ctx.p, &ctx_ql); + malloc_mutex_unlock(&prof_dump_mtx); return (true); } #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) +#define VSEQ_INVALID UINT64_C(0xffffffffffffffff) static void prof_dump_filename(char *filename, char v, int64_t vseq) { cassert(config_prof); - if (vseq != UINT64_C(0xffffffffffffffff)) { + if (vseq != VSEQ_INVALID) { /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"PRIu64".%c%"PRId64".heap", @@ -950,7 +1090,7 @@ prof_fdump(void) if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); + prof_dump_filename(filename, 'f', VSEQ_INVALID); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(false, filename, opt_prof_leak); } @@ -1056,14 +1196,6 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -static malloc_mutex_t * -prof_ctx_mutex_choose(void) -{ - unsigned nctxs = atomic_add_u(&cum_ctxs, 1); - - return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); -} - prof_tdata_t * prof_tdata_init(void) { @@ -1208,6 +1340,8 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); + if (malloc_mutex_init(&prof_dump_mtx)) + return (true); if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); @@ -1245,10 +1379,10 @@ prof_prefork(void) if (opt_prof) { unsigned i; - malloc_mutex_lock(&bt2ctx_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_prefork(&bt2ctx_mtx); + malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_lock(&ctx_locks[i]); + malloc_mutex_prefork(&ctx_locks[i]); } } diff --git a/contrib/jemalloc/src/quarantine.c b/contrib/jemalloc/src/quarantine.c index f96a948d5c70..5431511640a5 100644 --- a/contrib/jemalloc/src/quarantine.c +++ b/contrib/jemalloc/src/quarantine.c @@ -141,8 +141,17 @@ quarantine(void *ptr) obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; - if (opt_junk) - memset(ptr, 0x5a, usize); + if (config_fill && opt_junk) { + /* + * Only do redzone validation if Valgrind isn't in + * operation. + */ + if ((config_valgrind == false || opt_valgrind == false) + && usize <= SMALL_MAXCLASS) + arena_quarantine_junk_small(ptr, usize); + else + memset(ptr, 0x5a, usize); + } } else { assert(quarantine->curbytes == 0); idalloc(ptr); diff --git a/contrib/jemalloc/src/rtree.c b/contrib/jemalloc/src/rtree.c index 90c6935a0edd..205957ac4e1a 100644 --- a/contrib/jemalloc/src/rtree.c +++ b/contrib/jemalloc/src/rtree.c @@ -2,42 +2,55 @@ #include "jemalloc/internal/jemalloc_internal.h" rtree_t * -rtree_new(unsigned bits) +rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) { rtree_t *ret; - unsigned bits_per_level, height, i; + unsigned bits_per_level, bits_in_leaf, height, i; + + assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - height = bits / bits_per_level; - if (height * bits_per_level != bits) - height++; - assert(height * bits_per_level >= bits); + bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + if (bits > bits_in_leaf) { + height = 1 + (bits - bits_in_leaf) / bits_per_level; + if ((height-1) * bits_per_level + bits_in_leaf != bits) + height++; + } else { + height = 1; + } + assert((height-1) * bits_per_level + bits_in_leaf >= bits); - ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); if (ret == NULL) return (NULL); memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); + ret->alloc = alloc; + ret->dalloc = dalloc; if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ + if (dalloc != NULL) + dalloc(ret); return (NULL); } ret->height = height; - if (bits_per_level * height > bits) - ret->level2bits[0] = bits % bits_per_level; - else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height; i++) - ret->level2bits[i] = bits_per_level; + if (height > 1) { + if ((height-1) * bits_per_level + bits_in_leaf > bits) { + ret->level2bits[0] = (bits - bits_in_leaf) % + bits_per_level; + } else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height-1; i++) + ret->level2bits[i] = bits_per_level; + ret->level2bits[height-1] = bits_in_leaf; + } else + ret->level2bits[0] = bits; - ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); if (ret->root == NULL) { - /* - * We leak the rtree here, since there's no generic base - * deallocation. - */ + if (dalloc != NULL) + dalloc(ret); return (NULL); } memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); @@ -45,6 +58,31 @@ rtree_new(unsigned bits) return (ret); } +static void +rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +{ + + if (level < rtree->height - 1) { + size_t nchildren, i; + + nchildren = ZU(1) << rtree->level2bits[level]; + for (i = 0; i < nchildren; i++) { + void **child = (void **)node[i]; + if (child != NULL) + rtree_delete_subtree(rtree, child, level + 1); + } + } + rtree->dalloc(node); +} + +void +rtree_delete(rtree_t *rtree) +{ + + rtree_delete_subtree(rtree, rtree->root, 0); + rtree->dalloc(rtree); +} + void rtree_prefork(rtree_t *rtree) { diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c index 43f87af67000..bef2ab33cd4d 100644 --- a/contrib/jemalloc/src/stats.c +++ b/contrib/jemalloc/src/stats.c @@ -345,25 +345,25 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", bv ? "enabled" : "disabled"); -#define OPT_WRITE_BOOL(n) \ +#define OPT_WRITE_BOOL(n) \ if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %s\n", bv ? "true" : "false"); \ } -#define OPT_WRITE_SIZE_T(n) \ +#define OPT_WRITE_SIZE_T(n) \ if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zu\n", sv); \ } -#define OPT_WRITE_SSIZE_T(n) \ +#define OPT_WRITE_SSIZE_T(n) \ if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } -#define OPT_WRITE_CHAR_P(n) \ +#define OPT_WRITE_CHAR_P(n) \ if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 98ed19edd52e..6de92960b2df 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -260,8 +260,8 @@ tcache_arena_dissociate(tcache_t *tcache) /* Unlink from list of extant tcaches. */ malloc_mutex_lock(&tcache->arena->lock); ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); tcache_stats_merge(tcache, tcache->arena); + malloc_mutex_unlock(&tcache->arena->lock); } } @@ -292,7 +292,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icallocx(size, false, arena); + tcache = (tcache_t *)icalloct(size, false, arena); if (tcache == NULL) return (NULL); @@ -366,7 +366,7 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idallocx(tcache, false); + idalloct(tcache, false); } void @@ -399,11 +399,14 @@ tcache_thread_cleanup(void *arg) } } +/* Caller must own arena->lock. */ void tcache_stats_merge(tcache_t *tcache, arena_t *arena) { unsigned i; + cassert(config_stats); + /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c index 961a546329c1..700caabfe477 100644 --- a/contrib/jemalloc/src/tsd.c +++ b/contrib/jemalloc/src/tsd.c @@ -21,7 +21,7 @@ void malloc_tsd_dalloc(void *wrapper) { - idalloc(wrapper); + idalloct(wrapper, false); } void @@ -105,3 +105,37 @@ JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void * +tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) +{ + pthread_t self = pthread_self(); + tsd_init_block_t *iter; + + /* Check whether this thread has already inserted into the list. */ + malloc_mutex_lock(&head->lock); + ql_foreach(iter, &head->blocks, link) { + if (iter->thread == self) { + malloc_mutex_unlock(&head->lock); + return (iter->data); + } + } + /* Insert block into list. */ + ql_elm_new(block, link); + block->thread = self; + ql_tail_insert(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); + return (NULL); +} + +void +tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) +{ + + malloc_mutex_lock(&head->lock); + ql_remove(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); +} +#endif diff --git a/contrib/jemalloc/src/util.c b/contrib/jemalloc/src/util.c index df1c5d50ce5d..70b3e4534b2f 100644 --- a/contrib/jemalloc/src/util.c +++ b/contrib/jemalloc/src/util.c @@ -93,7 +93,7 @@ malloc_write(const char *s) * provide a wrapper. */ int -buferror(char *buf, size_t buflen) +buferror(int err, char *buf, size_t buflen) { #ifdef _WIN32 @@ -101,34 +101,36 @@ buferror(char *buf, size_t buflen) (LPSTR)buf, buflen, NULL); return (0); #elif defined(_GNU_SOURCE) - char *b = strerror_r(errno, buf, buflen); + char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); buf[buflen-1] = '\0'; } return (0); #else - return (strerror_r(errno, buf, buflen)); + return (strerror_r(err, buf, buflen)); #endif } uintmax_t -malloc_strtoumax(const char *nptr, char **endptr, int base) +malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) { uintmax_t ret, digit; int b; bool neg; const char *p, *ns; + p = nptr; if (base < 0 || base == 1 || base > 36) { + ns = p; set_errno(EINVAL); - return (UINTMAX_MAX); + ret = UINTMAX_MAX; + goto label_return; } b = base; /* Swallow leading whitespace and get sign, if any. */ neg = false; - p = nptr; while (true) { switch (*p) { case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': @@ -162,7 +164,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) if (b == 8) p++; break; - case 'x': + case 'X': case 'x': switch (p[2]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -180,7 +182,9 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) } break; default: - break; + p++; + ret = 0; + goto label_return; } } if (b == 0) @@ -197,13 +201,22 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) if (ret < pret) { /* Overflow. */ set_errno(ERANGE); - return (UINTMAX_MAX); + ret = UINTMAX_MAX; + goto label_return; } p++; } if (neg) ret = -ret; + if (p == ns) { + /* No conversion performed. */ + set_errno(EINVAL); + ret = UINTMAX_MAX; + goto label_return; + } + +label_return: if (endptr != NULL) { if (p == ns) { /* No characters were converted. */ @@ -211,7 +224,6 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) } else *endptr = (char *)p; } - return (ret); } @@ -347,7 +359,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) APPEND_C(' '); \ } \ } while (0) -#define GET_ARG_NUMERIC(val, len) do { \ +#define GET_ARG_NUMERIC(val, len) do { \ switch (len) { \ case '?': \ val = va_arg(ap, int); \ @@ -370,6 +382,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'j': \ val = va_arg(ap, intmax_t); \ break; \ + case 'j' | 0x80: \ + val = va_arg(ap, uintmax_t); \ + break; \ case 't': \ val = va_arg(ap, ptrdiff_t); \ break; \ @@ -401,11 +416,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) unsigned char len = '?'; f++; - if (*f == '%') { - /* %% */ - APPEND_C(*f); - break; - } /* Flags. */ while (true) { switch (*f) { @@ -435,6 +445,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '*': width = va_arg(ap, int); f++; + if (width < 0) { + left_justify = true; + width = -width; + } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { @@ -444,19 +458,16 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(uwidth != UINTMAX_MAX || get_errno() != ERANGE); width = (int)uwidth; - if (*f == '.') { - f++; - goto label_precision; - } else - goto label_length; break; - } case '.': - f++; - goto label_precision; - default: goto label_length; + } default: + break; } + /* Width/precision separator. */ + if (*f == '.') + f++; + else + goto label_length; /* Precision. */ - label_precision: switch (*f) { case '*': prec = va_arg(ap, int); @@ -485,16 +496,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } else len = 'l'; break; - case 'j': - len = 'j'; - f++; - break; - case 't': - len = 't'; - f++; - break; - case 'z': - len = 'z'; + case 'q': case 'j': case 't': case 'z': + len = *f; f++; break; default: break; @@ -503,6 +506,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) switch (*f) { char *s; size_t slen; + case '%': + /* %% */ + APPEND_C(*f); + f++; + break; case 'd': case 'i': { intmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[D2S_BUFSIZE]; @@ -556,7 +564,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(len == '?' || len == 'l'); assert_not_implemented(len != 'l'); s = va_arg(ap, char *); - slen = (prec == -1) ? strlen(s) : prec; + slen = (prec < 0) ? strlen(s) : prec; APPEND_PADDED_S(s, slen, width, left_justify); f++; break; @@ -569,8 +577,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) APPEND_PADDED_S(s, slen, width, left_justify); f++; break; - } - default: not_implemented(); + } default: not_reached(); } break; } default: { diff --git a/include/malloc_np.h b/include/malloc_np.h index 77efd47b818b..193307b5b459 100644 --- a/include/malloc_np.h +++ b/include/malloc_np.h @@ -46,6 +46,18 @@ int mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); int mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +#define MALLOCX_LG_ALIGN(la) (la) +#define MALLOCX_ALIGN(a) (ffsl(a)-1) +#define MALLOCX_ZERO ((int)0x40) +#define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) + +void *mallocx(size_t size, int flags); +void *rallocx(void *ptr, size_t size, int flags); +size_t xallocx(void *ptr, size_t size, size_t extra, int flags); +size_t sallocx(const void *ptr, int flags); +void dallocx(void *ptr, int flags); +size_t nallocx(size_t size, int flags); + #define ALLOCM_LG_ALIGN(la) (la) #define ALLOCM_ALIGN(a) (ffsl(a)-1) #define ALLOCM_ZERO ((int)0x40) @@ -68,6 +80,12 @@ void * __realloc(void *, size_t); void __free(void *); int __posix_memalign(void **, size_t, size_t); size_t __malloc_usable_size(const void *); +void *__mallocx(size_t size, int flags); +void *__rallocx(void *ptr, size_t size, int flags); +size_t __xallocx(void *ptr, size_t size, size_t extra, int flags); +size_t __sallocx(const void *ptr, int flags); +void __dallocx(void *ptr, int flags); +size_t __nallocx(size_t size, int flags); int __allocm(void **, size_t *, size_t, int) __nonnull(1); int __rallocm(void **, size_t *, size_t, size_t, int) __nonnull(1); int __sallocm(const void *, size_t *, int) __nonnull(1); diff --git a/lib/libc/gen/tls.c b/lib/libc/gen/tls.c index 466f9579acb4..4b083da2ae82 100644 --- a/lib/libc/gen/tls.c +++ b/lib/libc/gen/tls.c @@ -40,9 +40,9 @@ #include "libc_private.h" /* Provided by jemalloc to avoid bootstrapping issues. */ -void *__jemalloc_a0malloc(size_t size); -void *__jemalloc_a0calloc(size_t num, size_t size); -void __jemalloc_a0free(void *ptr); +void *__je_a0malloc(size_t size); +void *__je_a0calloc(size_t num, size_t size); +void __je_a0free(void *ptr); __weak_reference(__libc_allocate_tls, _rtld_allocate_tls); __weak_reference(__libc_free_tls, _rtld_free_tls); @@ -125,8 +125,8 @@ __libc_free_tls(void *tcb, size_t tcbsize, size_t tcbalign __unused) tls = (Elf_Addr **)((Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE); dtv = tls[0]; - __jemalloc_a0free(dtv); - __jemalloc_a0free(tcb); + __je_a0free(dtv); + __je_a0free(tcb); } /* @@ -142,18 +142,18 @@ __libc_allocate_tls(void *oldtcb, size_t tcbsize, size_t tcbalign __unused) if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); - tcb = __jemalloc_a0calloc(1, tls_static_space + tcbsize - TLS_TCB_SIZE); + tcb = __je_a0calloc(1, tls_static_space + tcbsize - TLS_TCB_SIZE); tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE); if (oldtcb != NULL) { memcpy(tls, oldtcb, tls_static_space); - __jemalloc_a0free(oldtcb); + __je_a0free(oldtcb); /* Adjust the DTV. */ dtv = tls[0]; dtv[2] = (Elf_Addr)tls + TLS_TCB_SIZE; } else { - dtv = __jemalloc_a0malloc(3 * sizeof(Elf_Addr)); + dtv = __je_a0malloc(3 * sizeof(Elf_Addr)); tls[0] = dtv; dtv[0] = 1; dtv[1] = 1; @@ -194,8 +194,8 @@ __libc_free_tls(void *tcb, size_t tcbsize __unused, size_t tcbalign) dtv = ((Elf_Addr**)tcb)[1]; tlsend = (Elf_Addr) tcb; tlsstart = tlsend - size; - __jemalloc_a0free((void*) tlsstart); - __jemalloc_a0free(dtv); + __je_a0free((void*) tlsstart); + __je_a0free(dtv); } /* @@ -213,8 +213,8 @@ __libc_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign) if (tcbsize < 2 * sizeof(Elf_Addr)) tcbsize = 2 * sizeof(Elf_Addr); - tls = __jemalloc_a0calloc(1, size + tcbsize); - dtv = __jemalloc_a0malloc(3 * sizeof(Elf_Addr)); + tls = __je_a0calloc(1, size + tcbsize); + dtv = __je_a0malloc(3 * sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + size); ((Elf_Addr*)segbase)[0] = segbase; diff --git a/lib/libc/stdlib/jemalloc/Makefile.inc b/lib/libc/stdlib/jemalloc/Makefile.inc index 971867638494..dbd0fd78cdfe 100644 --- a/lib/libc/stdlib/jemalloc/Makefile.inc +++ b/lib/libc/stdlib/jemalloc/Makefile.inc @@ -4,7 +4,7 @@ JEMALLOCSRCS:= jemalloc.c arena.c atomic.c base.c bitmap.c chunk.c \ chunk_dss.c chunk_mmap.c ckh.c ctl.c extent.c hash.c huge.c mb.c \ - mutex.c prof.c quarantine.c rtree.c stats.c tcache.c util.c tsd.c + mutex.c prof.c quarantine.c rtree.c stats.c tcache.c tsd.c util.c SYM_MAPS+=${.CURDIR}/stdlib/jemalloc/Symbol.map @@ -34,6 +34,12 @@ MLINKS+= \ jemalloc.3 mallctl.3 \ jemalloc.3 mallctlnametomib.3 \ jemalloc.3 mallctlbymib.3 \ + jemalloc.3 mallocx.3 \ + jemalloc.3 rallocx.3 \ + jemalloc.3 xallocx.3 \ + jemalloc.3 sallocx.3 \ + jemalloc.3 dallocx.3 \ + jemalloc.3 nallocx.3 \ jemalloc.3 allocm.3 \ jemalloc.3 rallocm.3 \ jemalloc.3 sallocm.3 \ diff --git a/lib/libc/stdlib/jemalloc/Symbol.map b/lib/libc/stdlib/jemalloc/Symbol.map index 617194f87a46..35a5dad8995e 100644 --- a/lib/libc/stdlib/jemalloc/Symbol.map +++ b/lib/libc/stdlib/jemalloc/Symbol.map @@ -21,6 +21,12 @@ FBSD_1.3 { mallctl; mallctlnametomib; mallctlbymib; + mallocx; + rallocx; + xallocx; + sallocx; + dallocx; + nallocx; allocm; rallocm; sallocm; @@ -32,6 +38,12 @@ FBSD_1.3 { __free; __posix_memalign; __malloc_usable_size; + __mallocx; + __rallocx; + __xallocx; + __sallocx; + __dallocx; + __nallocx; __allocm; __rallocm; __sallocm;