[issue11849] glibc allocator doesn't release all free()ed memory

Charles-François Natali report at bugs.python.org
Fri Nov 25 23:45:19 CET 2011


Charles-François Natali <neologix at free.fr> added the comment:

> Hmm, quite slow indeed, are you sure you're not running in debug mode?
>

Well, yes, but it's no faster with a non-debug build: my laptop is
really crawling :-)

> If the performance regression is limited to read(), I don't think it's
> really an issue, but using mmap/munmap explicitly would probably benicer
> anyway (1° because it lets the glibc choose whatever heuristic is best,
> 2° because it would help release memory on more systems than just glibc
> systems). I think limiting ourselves to systems which have
> MMAP_ANONYMOUS is good enough.
>

Agreed.
Here's a patch.

----------
Added file: http://bugs.python.org/file23782/arenas_mmap.diff

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue11849>
_______________________________________
-------------- next part --------------
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -2,8 +2,11 @@
 
 #ifdef WITH_PYMALLOC
 
-#ifdef HAVE_MALLOPT_MMAP_THRESHOLD
-  #include <malloc.h>
+#ifdef HAVE_MMAP
+ #include <sys/mman.h>
+ #ifdef MAP_ANONYMOUS
+  #define ARENAS_USE_MMAP
+ #endif
 #endif
 
 #ifdef WITH_VALGRIND
@@ -183,15 +186,15 @@
 /*
  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
  * on a page boundary. This is a reserved virtual address space for the
- * current process (obtained through a malloc call). In no way this means
- * that the memory arenas will be used entirely. A malloc(<Big>) is usually
- * an address range reservation for <Big> bytes, unless all pages within this
- * space are referenced subsequently. So malloc'ing big blocks and not using
- * them does not mean "wasting memory". It's an addressable range wastage...
+ * current process (obtained through a malloc()/mmap() call). In no way this
+ * means that the memory arenas will be used entirely. A malloc(<Big>) is
+ * usually an address range reservation for <Big> bytes, unless all pages within
+ * this space are referenced subsequently. So malloc'ing big blocks and not
+ * using them does not mean "wasting memory". It's an addressable range
+ * wastage...
  *
- * Therefore, allocating arenas with malloc is not optimal, because there is
- * some address space wastage, but this is the most portable way to request
- * memory from the system across various platforms.
+ * Arenas are allocated with mmap() on systems supporting anonymous memory
+ * mappings to reduce heap fragmentation.
  */
 #define ARENA_SIZE              (256 << 10)     /* 256KB */
 
@@ -557,11 +560,6 @@
         if (numarenas > PY_SIZE_MAX / sizeof(*arenas))
             return NULL;                /* overflow */
 #endif
-#ifdef HAVE_MALLOPT_MMAP_THRESHOLD
-        /* Ensure arenas are allocated by mmap to avoid heap fragmentation. */
-        if (numarenas == INITIAL_ARENA_OBJECTS)
-            mallopt(M_MMAP_THRESHOLD, ARENA_SIZE);
-#endif
         nbytes = numarenas * sizeof(*arenas);
         arenaobj = (struct arena_object *)realloc(arenas, nbytes);
         if (arenaobj == NULL)
@@ -594,7 +592,12 @@
     arenaobj = unused_arena_objects;
     unused_arena_objects = arenaobj->nextarena;
     assert(arenaobj->address == 0);
+#ifdef ARENAS_USE_MMAP
+    arenaobj->address = (uptr)mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE,
+                                   MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+#else
     arenaobj->address = (uptr)malloc(ARENA_SIZE);
+#endif
     if (arenaobj->address == 0) {
         /* The allocation failed: return NULL after putting the
          * arenaobj back.
@@ -1071,7 +1074,11 @@
                 unused_arena_objects = ao;
 
                 /* Free the entire arena. */
+#ifdef ARENAS_USE_MMAP
+                munmap((void *)ao->address, ARENA_SIZE);
+#else
                 free((void *)ao->address);
+#endif
                 ao->address = 0;                        /* mark unassociated */
                 --narenas_currently_allocated;
 
diff --git a/configure.in b/configure.in
--- a/configure.in
+++ b/configure.in
@@ -2567,8 +2567,8 @@
  getgrouplist getgroups getlogin getloadavg getpeername getpgid getpid \
  getpriority getresuid getresgid getpwent getspnam getspent getsid getwd \
  if_nameindex \
- initgroups kill killpg lchmod lchown lockf linkat lstat lutimes memrchr \
- mbrtowc mkdirat mkfifo \
+ initgroups kill killpg lchmod lchown lockf linkat lstat lutimes mmap \
+ memrchr mbrtowc mkdirat mkfifo \
  mkfifoat mknod mknodat mktime mremap nice openat pathconf pause pipe2 plock poll \
  posix_fallocate posix_fadvise pread \
  pthread_init pthread_kill putenv pwrite readlink readlinkat readv realpath renameat \
@@ -2679,15 +2679,6 @@
   [AC_MSG_RESULT(no)
 ])
 
-AC_MSG_CHECKING(whether mallopt can set malloc mmap threshold)
-AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-#include <malloc.h>
-]], [[mallopt(M_MMAP_THRESHOLD, 256 * 1024)]])],
-  [AC_DEFINE(HAVE_MALLOPT_MMAP_THRESHOLD, 1, Define if mallopt can set malloc mmap threshold.)
-   AC_MSG_RESULT(yes)],
-  [AC_MSG_RESULT(no)
-])
-
 AC_MSG_CHECKING(for broken unsetenv)
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 #include <stdlib.h>
diff --git a/pyconfig.h.in b/pyconfig.h.in
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -512,9 +512,6 @@
 /* Define this if you have the makedev macro. */
 #undef HAVE_MAKEDEV
 
-/* Define if mallopt can set malloc mmap threshold. */
-#undef HAVE_MALLOPT_MMAP_THRESHOLD
-
 /* Define to 1 if you have the `mbrtowc' function. */
 #undef HAVE_MBRTOWC
 
@@ -545,6 +542,9 @@
 /* Define to 1 if you have the `mktime' function. */
 #undef HAVE_MKTIME
 
+/* Define to 1 if you have the `mmap' function. */
+#undef HAVE_MMAP
+
 /* Define to 1 if you have the `mremap' function. */
 #undef HAVE_MREMAP
 


More information about the Python-bugs-list mailing list