diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b999c6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.*.swp +*~ +ID +tags +malloc.o +libjemalloc.so + diff --git a/malloc.c b/malloc.c index 8bc2c85..13e63a1 100644 --- a/malloc.c +++ b/malloc.c @@ -109,12 +109,18 @@ ******************************************************************************* */ +/* + * Modified by Sami Farin + * gcc -std=gnu99 -O3 -fPIC -DPIC -c malloc.c -m64 -pie -fstack-protector + * gcc -shared malloc.o -o libjemalloc.so -lpthread + */ + /* * MALLOC_PRODUCTION disables assertions and statistics gathering. It also * defaults the A and J runtime options to off. These settings are appropriate * for production systems. */ -#define MALLOC_PRODUCTION +#define MALLOC_PRODUCTION #ifndef MALLOC_PRODUCTION /* @@ -145,7 +151,7 @@ * re-balances arena load if exponentially averaged contention exceeds a * certain threshold. */ -#define MALLOC_BALANCE +#define MALLOC_BALANCE /* * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage @@ -155,28 +161,19 @@ */ #define MALLOC_DSS -#include -__FBSDID("$FreeBSD$"); +#define _GNU_SOURCE -#include "libc_private.h" #ifdef MALLOC_DEBUG # define _LOCK_DEBUG #endif #include "spinlock.h" -#include "namespace.h" +#include "paging.h" + +#include #include -#include -#include +#include #include -#include -#include -#include -#include /* Must come after several other sys/ includes. */ - -#include -#include -#include - +#include #include #include #include @@ -190,8 +187,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include "un-namespace.h" - #ifdef MALLOC_DEBUG # ifdef NDEBUG # undef NDEBUG @@ -205,6 +200,13 @@ __FBSDID("$FreeBSD$"); #include "rb.h" +#define SIZE_T_MAX SIZE_MAX +#define __unused __attribute__((__unused__)) +#define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#define __weak_reference(sym,alias) \ + __asm__(".weak " #alias); \ + __asm__(".equ " #alias ", " #sym) + #ifdef MALLOC_DEBUG /* Disable inlining to make debugging easier. */ # define inline @@ -409,9 +411,8 @@ __FBSDID("$FreeBSD$"); * places, because they require malloc()ed memory, which causes bootstrapping * issues in some cases. */ -typedef struct { - spinlock_t lock; -} malloc_mutex_t; +typedef pthread_mutex_t malloc_mutex_t; +typedef pthread_mutex_t malloc_spinlock_t; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -1070,28 +1071,12 @@ static bool opt_xmalloc = false; static bool opt_zero = false; static int opt_narenas_lshift = 0; -typedef struct { - void *p; - size_t s; - void *r; -} malloc_utrace_t; - -#define UTRACE(a, b, c) \ - if (opt_utrace) { \ - malloc_utrace_t ut; \ - ut.p = (a); \ - ut.s = (b); \ - ut.r = (c); \ - utrace(&ut, sizeof(ut)); \ - } - /******************************************************************************/ /* * Begin function prototypes for non-inline static functions. */ -static void malloc_mutex_init(malloc_mutex_t *mutex); -static bool malloc_spin_init(pthread_mutex_t *lock); +static bool malloc_mutex_init(malloc_mutex_t *mutex); static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4); #ifdef MALLOC_STATS @@ -1193,114 +1178,107 @@ static bool malloc_init_hard(void); * cases. */ -static void -malloc_mutex_init(malloc_mutex_t *mutex) -{ - static const spinlock_t lock = _SPINLOCK_INITIALIZER; - - mutex->lock = lock; -} - -static inline void -malloc_mutex_lock(malloc_mutex_t *mutex) -{ - - if (__isthreaded) - _SPINLOCK(&mutex->lock); -} - -static inline void -malloc_mutex_unlock(malloc_mutex_t *mutex) -{ - - if (__isthreaded) - _SPINUNLOCK(&mutex->lock); -} - -/* - * End mutex. - */ -/******************************************************************************/ -/* - * Begin spin lock. Spin locks here are actually adaptive mutexes that block - * after a period of spinning, because unbounded spinning would allow for - * priority inversion. - */ - /* * We use an unpublished interface to initialize pthread mutexes with an * allocation callback, in order to avoid infinite recursion. */ -int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, - void *(calloc_cb)(size_t, size_t)); +/* +int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); __weak_reference(_pthread_mutex_init_calloc_cb_stub, - _pthread_mutex_init_calloc_cb); + _pthread_mutex_init_calloc_cb); int _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, - void *(calloc_cb)(size_t, size_t)) + void *(calloc_cb)(size_t, size_t)) { - - return (0); + return (0); } static bool malloc_spin_init(pthread_mutex_t *lock) { - if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0) return (true); - return (false); } - +*/ +static bool +malloc_mutex_init(malloc_mutex_t *mutex) +{ + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr) != 0) + return (true); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); + if (pthread_mutex_init(mutex, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return (true); + } + pthread_mutexattr_destroy(&attr); + return (pthread_mutex_init(mutex, NULL) != 0); +} +#if 0 +static inline void +malloc_mutex_lock(malloc_mutex_t *mutex) +{ + pthread_mutex_lock(mutex); +} +#else static inline unsigned malloc_spin_lock(pthread_mutex_t *lock) { unsigned ret = 0; - if (__isthreaded) { - if (_pthread_mutex_trylock(lock) != 0) { - /* Exponentially back off if there are multiple CPUs. */ - if (ncpus > 1) { - unsigned i; - volatile unsigned j; - - for (i = 1; i <= SPIN_LIMIT_2POW; i++) { - for (j = 0; j < (1U << i); j++) { - ret++; - CPU_SPINWAIT; - } + if (pthread_mutex_trylock(lock) != 0) { + /* Exponentially back off if there are multiple CPUs. */ + if (ncpus > 1) { + unsigned i; + volatile unsigned j; - if (_pthread_mutex_trylock(lock) == 0) - return (ret); + for (i = 1; i <= SPIN_LIMIT_2POW; i++) { + for (j = 0; j < (1U << i); j++) { + ret++; + CPU_SPINWAIT; } + if (pthread_mutex_trylock(lock) == 0) + return (ret); } - - /* - * Spinning failed. Block until the lock becomes - * available, in order to avoid indefinite priority - * inversion. - */ - _pthread_mutex_lock(lock); - assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1); - return (ret << BLOCK_COST_2POW); } + /* + * Spinning failed. Block until the lock becomes + * available, in order to avoid indefinite priority + * inversion. + */ + pthread_mutex_lock(lock); + assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1); + return (ret << BLOCK_COST_2POW); } - return (ret); } - +#endif static inline void -malloc_spin_unlock(pthread_mutex_t *lock) +malloc_mutex_unlock(malloc_mutex_t *mutex) { - - if (__isthreaded) - _pthread_mutex_unlock(lock); + pthread_mutex_unlock(mutex); } /* + * End mutex. + */ +/******************************************************************************/ +/* + * Begin spin lock. Spin locks here are actually adaptive mutexes that block + * after a period of spinning, because unbounded spinning would allow for + * priority inversion. + */ + +#define malloc_spin_init malloc_mutex_init +//#define malloc_spin_lock malloc_mutex_lock +#define malloc_mutex_lock malloc_spin_lock +#define malloc_spin_unlock malloc_mutex_unlock + +/* * End spin lock. */ /******************************************************************************/ @@ -1400,6 +1378,25 @@ prn_##suffix(uint32_t lg_range) \ # define PRN(suffix, lg_range) prn_##suffix(lg_range) #endif +static inline char* _getprogname(void) +{ + return program_invocation_name; +} + +extern int __libc_enable_secure; + +static int issetugid_linux(void) +{ + if (__libc_enable_secure) + return 1; + if (getuid() != geteuid()) + return 1; + if (getgid() != getegid()) + return 1; + /* Else */ + return 0; +} + #ifdef MALLOC_BALANCE /* Define the PRNG used for arena assignment. */ static __thread uint32_t balance_x; @@ -1409,11 +1406,17 @@ PRN_DEFINE(balance, balance_x, 1297, 1301) static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4) { - - _write(STDERR_FILENO, p1, strlen(p1)); - _write(STDERR_FILENO, p2, strlen(p2)); - _write(STDERR_FILENO, p3, strlen(p3)); - _write(STDERR_FILENO, p4, strlen(p4)); + struct iovec iov[4]; + + iov[0].iov_base = (void*)p1; + iov[0].iov_len = strlen(p1); + iov[1].iov_base = (void*)p2; + iov[1].iov_len = strlen(p2); + iov[2].iov_base = (void*)p3; + iov[2].iov_len = strlen(p3); + iov[3].iov_base = (void*)p4; + iov[3].iov_len = strlen(p4); + writev(STDERR_FILENO, iov, 4); } void (*_malloc_message)(const char *p1, const char *p2, const char *p3, @@ -1649,7 +1652,7 @@ stats_print(arena_t *arena) malloc_printf("mapped: %12zu\n", arena->stats.mapped); #ifdef MALLOC_MAG - if (__isthreaded && opt_mag) { + if (opt_mag) { malloc_printf("bins: bin size regs pgs mags " "newruns reruns maxruns curruns\n"); } else { @@ -1685,7 +1688,7 @@ stats_print(arena_t *arena) arena->bins[i].nregs, arena->bins[i].run_size >> PAGE_SHIFT, #ifdef MALLOC_MAG - (__isthreaded && opt_mag) ? + (opt_mag) ? arena->bins[i].stats.nmags : #endif arena->bins[i].stats.nrequests, @@ -2124,7 +2127,9 @@ chunk_dealloc_dss(void *chunk, size_t size) malloc_mutex_unlock(&dss_mtx); } else { malloc_mutex_unlock(&dss_mtx); +#ifdef MADV_FREE madvise(chunk, size, MADV_FREE); +#endif } return (false); @@ -2189,18 +2194,13 @@ choose_arena(void) * introduces a bootstrapping issue. */ #ifndef NO_TLS - if (__isthreaded == false) { - /* Avoid the overhead of TLS for single-threaded operation. */ - return (arenas[0]); - } - ret = arenas_map; if (ret == NULL) { ret = choose_arena_hard(); assert(ret != NULL); } #else - if (__isthreaded && narenas > 1) { + if (narenas > 1) { unsigned long ind; /* @@ -2211,7 +2211,7 @@ choose_arena(void) * knowledge of how _pthread_self() calculates values, we can't * easily do much better than this. */ - ind = (unsigned long) _pthread_self() % narenas; + ind = (unsigned long) pthread_self() % narenas; /* * Optimistially assume that arenas[ind] has been initialized. @@ -2239,10 +2239,8 @@ choose_arena(void) ret = arenas[ind]; malloc_spin_unlock(&arenas_lock); } - } else - ret = arenas[0]; + } #endif - assert(ret != NULL); return (ret); } @@ -2257,11 +2255,9 @@ choose_arena_hard(void) { arena_t *ret; - assert(__isthreaded); - #ifdef MALLOC_BALANCE /* Seed the PRNG used for arena load balancing. */ - SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self())); + SPRN(balance, (uint32_t)(uintptr_t)(pthread_self())); #endif if (narenas > 1) { @@ -2772,9 +2768,11 @@ arena_purge(arena_t *arena) chunk->ndirty -= npages; arena->ndirty -= npages; +#ifdef MADV_FREE madvise((void *)((uintptr_t)chunk + (i << PAGE_SHIFT)), (npages << PAGE_SHIFT), MADV_FREE); +#endif #ifdef MALLOC_STATS arena->stats.nmadvise++; arena->stats.purged += npages; @@ -3279,6 +3277,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) #else malloc_spin_lock(&arena->lock); #endif + if ((run = bin->runcur) != NULL && run->nfree > 0) ret = arena_bin_malloc_easy(arena, bin, run); else @@ -3351,7 +3350,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero) if (size <= bin_maxclass) { #ifdef MALLOC_MAG - if (__isthreaded && opt_mag) { + if (opt_mag) { mag_rack_t *rack = mag_rack; if (rack == NULL) { rack = mag_rack_create(arena); @@ -3852,7 +3851,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ #ifdef MALLOC_MAG - if (__isthreaded && opt_mag) { + if (opt_mag) { mag_rack_t *rack = mag_rack; if (rack == NULL) { rack = mag_rack_create(arena); @@ -4784,31 +4783,10 @@ malloc_init_hard(void) /* Get number of CPUs. */ { - int mib[2]; - size_t len; - - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - len = sizeof(ncpus); - if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) { - /* Error. */ - ncpus = 1; - } - } - - /* - * Increase the chunk size to the largest page size that is greater - * than the default chunk size and less than or equal to 4MB. - */ - { - size_t pagesizes[MAXPAGESIZES]; - int k, nsizes; - - nsizes = getpagesizes(pagesizes, MAXPAGESIZES); - for (k = 0; k < nsizes; k++) - if (pagesizes[k] <= (1LU << 22)) - while ((1LU << opt_chunk_2pow) < pagesizes[k]) - opt_chunk_2pow++; + //ncpus = sysconf(_SC_NPROCESSORS_CONF); /* Hangs! */ + ncpus = 2; + if (ncpus == 0) ncpus = 1; + else if (ncpus > 4096) ncpus = 4096; } for (i = 0; i < 3; i++) { @@ -4832,7 +4810,7 @@ malloc_init_hard(void) } break; case 1: - if (issetugid() == 0 && (opts = + if (issetugid_linux() == 0 && (opts = getenv("MALLOC_OPTIONS")) != NULL) { /* * Do nothing; opts is already initialized to @@ -5117,8 +5095,6 @@ MALLOC_OUT: arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT); - UTRACE(0, 0, 0); - #ifdef MALLOC_STATS memset(&stats_chunks, 0, sizeof(chunk_stats_t)); #endif @@ -5266,6 +5242,7 @@ MALLOC_OUT: malloc_initialized = true; malloc_mutex_unlock(&init_lock); + return (false); } @@ -5309,7 +5286,6 @@ RETURN: errno = ENOMEM; } - UTRACE(0, size, ret); return (ret); } @@ -5363,7 +5339,6 @@ posix_memalign(void **memptr, size_t alignment, size_t size) ret = 0; RETURN: - UTRACE(0, size, result); return (ret); } @@ -5412,7 +5387,6 @@ RETURN: errno = ENOMEM; } - UTRACE(0, num_size, ret); return (ret); } @@ -5464,15 +5438,12 @@ realloc(void *ptr, size_t size) } RETURN: - UTRACE(ptr, size, ret); return (ret); } void free(void *ptr) { - - UTRACE(ptr, 0, 0); if (ptr != NULL) { assert(malloc_initialized); diff --git a/paging.h b/paging.h new file mode 100644 index 0000000..d78a33f --- /dev/null +++ b/paging.h @@ -0,0 +1,20 @@ +#ifndef _PAGING_H_ +#define _PAGING_H_ + +#if defined(__i386__) || defined(__x86_64__) + +/* Size of the level 1 page table units */ +#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */ +#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ +#define PAGE_SIZE (1< -__FBSDID("$FreeBSD$"); /* Node structure. */ #define rb_node(a_type) \