925e6b
diff --git a/Include/dictobject.h b/Include/dictobject.h
925e6b
index ece01c6..acc1df0 100644
925e6b
--- a/Include/dictobject.h
925e6b
+++ b/Include/dictobject.h
925e6b
@@ -150,6 +150,8 @@ PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key);
925e6b
 PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item);
925e6b
 PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/floatobject.h b/Include/floatobject.h
925e6b
index 54e8825..33c6ac0 100644
925e6b
--- a/Include/floatobject.h
925e6b
+++ b/Include/floatobject.h
925e6b
@@ -132,6 +132,7 @@ PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
925e6b
    failure.  Used in builtin_round in bltinmodule.c. */
925e6b
 PyAPI_FUNC(PyObject *) _Py_double_round(double x, int ndigits);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyFloat_DebugMallocStats(FILE* out);
925e6b
 
925e6b
 
925e6b
 #ifdef __cplusplus
925e6b
diff --git a/Include/frameobject.h b/Include/frameobject.h
925e6b
index 17e7679..66d9d8b 100644
925e6b
--- a/Include/frameobject.h
925e6b
+++ b/Include/frameobject.h
925e6b
@@ -80,6 +80,8 @@ PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *);
925e6b
 
925e6b
 PyAPI_FUNC(int) PyFrame_ClearFreeList(void);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 /* Return the line of code the frame is currently executing. */
925e6b
 PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *);
925e6b
 
925e6b
diff --git a/Include/intobject.h b/Include/intobject.h
925e6b
index 252eea9..4003736 100644
925e6b
--- a/Include/intobject.h
925e6b
+++ b/Include/intobject.h
925e6b
@@ -75,6 +75,8 @@ PyAPI_FUNC(PyObject *) _PyInt_FormatAdvanced(PyObject *obj,
925e6b
 					     char *format_spec,
925e6b
 					     Py_ssize_t format_spec_len);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyInt_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/listobject.h b/Include/listobject.h
925e6b
index c445873..04664d7 100644
925e6b
--- a/Include/listobject.h
925e6b
+++ b/Include/listobject.h
925e6b
@@ -62,6 +62,8 @@ PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *);
925e6b
 #define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v))
925e6b
 #define PyList_GET_SIZE(op)    Py_SIZE(op)
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyList_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/methodobject.h b/Include/methodobject.h
925e6b
index 6e160b6..1944517 100644
925e6b
--- a/Include/methodobject.h
925e6b
+++ b/Include/methodobject.h
925e6b
@@ -87,6 +87,10 @@ typedef struct {
925e6b
 
925e6b
 PyAPI_FUNC(int) PyCFunction_ClearFreeList(void);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyCFunction_DebugMallocStats(FILE *out);
925e6b
+PyAPI_FUNC(void) _PyMethod_DebugMallocStats(FILE *out);
925e6b
+
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/object.h b/Include/object.h
925e6b
index afbc68d..ce5febf 100644
925e6b
--- a/Include/object.h
925e6b
+++ b/Include/object.h
925e6b
@@ -1005,6 +1005,13 @@ PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void);
925e6b
             _PyTrash_thread_deposit_object((PyObject*)op); \
925e6b
     } while (0);
925e6b
 
925e6b
+PyAPI_FUNC(void)
925e6b
+_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks,
925e6b
+		       size_t sizeof_block);
925e6b
+
925e6b
+PyAPI_FUNC(void)
925e6b
+_PyObject_DebugTypeStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/objimpl.h b/Include/objimpl.h
925e6b
index 55e83ec..331b456 100644
925e6b
--- a/Include/objimpl.h
925e6b
+++ b/Include/objimpl.h
925e6b
@@ -101,13 +101,13 @@ PyAPI_FUNC(void) PyObject_Free(void *);
925e6b
 
925e6b
 /* Macros */
925e6b
 #ifdef WITH_PYMALLOC
925e6b
+PyAPI_FUNC(void) _PyObject_DebugMallocStats(FILE *out);
925e6b
 #ifdef PYMALLOC_DEBUG   /* WITH_PYMALLOC && PYMALLOC_DEBUG */
925e6b
 PyAPI_FUNC(void *) _PyObject_DebugMalloc(size_t nbytes);
925e6b
 PyAPI_FUNC(void *) _PyObject_DebugRealloc(void *p, size_t nbytes);
925e6b
 PyAPI_FUNC(void) _PyObject_DebugFree(void *p);
925e6b
 PyAPI_FUNC(void) _PyObject_DebugDumpAddress(const void *p);
925e6b
 PyAPI_FUNC(void) _PyObject_DebugCheckAddress(const void *p);
925e6b
-PyAPI_FUNC(void) _PyObject_DebugMallocStats(void);
925e6b
 PyAPI_FUNC(void *) _PyObject_DebugMallocApi(char api, size_t nbytes);
925e6b
 PyAPI_FUNC(void *) _PyObject_DebugReallocApi(char api, void *p, size_t nbytes);
925e6b
 PyAPI_FUNC(void) _PyObject_DebugFreeApi(char api, void *p);
925e6b
diff --git a/Include/setobject.h b/Include/setobject.h
925e6b
index 52b07d5..143b175 100644
925e6b
--- a/Include/setobject.h
925e6b
+++ b/Include/setobject.h
925e6b
@@ -93,6 +93,7 @@ PyAPI_FUNC(int) _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key,
925e6b
 PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set);
925e6b
 PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PySet_DebugMallocStats(FILE *out);
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/stringobject.h b/Include/stringobject.h
925e6b
index 18b5b41..de78d76 100644
925e6b
--- a/Include/stringobject.h
925e6b
+++ b/Include/stringobject.h
925e6b
@@ -204,6 +204,8 @@ PyAPI_FUNC(PyObject *) _PyBytes_FormatAdvanced(PyObject *obj,
925e6b
 					       char *format_spec,
925e6b
 					       Py_ssize_t format_spec_len);
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyString_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/tupleobject.h b/Include/tupleobject.h
925e6b
index a5ab733..e233f47 100644
925e6b
--- a/Include/tupleobject.h
925e6b
+++ b/Include/tupleobject.h
925e6b
@@ -54,7 +54,7 @@ PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *);
925e6b
 #define PyTuple_SET_ITEM(op, i, v) (((PyTupleObject *)(op))->ob_item[i] = v)
925e6b
 
925e6b
 PyAPI_FUNC(int) PyTuple_ClearFreeList(void);
925e6b
-
925e6b
+PyAPI_FUNC(void) _PyTuple_DebugMallocStats(FILE *out);
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
925e6b
index 9ab724a..b91250a 100644
925e6b
--- a/Include/unicodeobject.h
925e6b
+++ b/Include/unicodeobject.h
925e6b
@@ -1406,6 +1406,8 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
925e6b
     Py_UNICODE ch       /* Unicode character */
925e6b
     );
925e6b
 
925e6b
+PyAPI_FUNC(void) _PyUnicode_DebugMallocStats(FILE *out);
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
925e6b
index 82243f3..8f1e1a0 100644
925e6b
--- a/Lib/test/test_sys.py
925e6b
+++ b/Lib/test/test_sys.py
925e6b
@@ -488,6 +488,32 @@ class SysModuleTest(unittest.TestCase):
925e6b
         p.wait()
925e6b
         self.assertIn(executable, ["''", repr(sys.executable)])
925e6b
 
925e6b
+    def test_debugmallocstats(self):
925e6b
+        # Test sys._debugmallocstats()
925e6b
+
925e6b
+        import subprocess
925e6b
+
925e6b
+        # Verify the default of writing to stderr:
925e6b
+        p = subprocess.Popen([sys.executable,
925e6b
+                              '-c', 'import sys; sys._debugmallocstats()'],
925e6b
+                             stderr=subprocess.PIPE)
925e6b
+        out, err = p.communicate()
925e6b
+        p.wait()
925e6b
+        self.assertIn("arenas allocated current", err)
925e6b
+
925e6b
+        # Verify that we can redirect the output to a file (not a file-like
925e6b
+        # object, though):
925e6b
+        with open('mallocstats.txt', 'w') as out:
925e6b
+            sys._debugmallocstats(out)
925e6b
+        result = open('mallocstats.txt').read()
925e6b
+        self.assertIn("arenas allocated current", result)
925e6b
+        os.unlink('mallocstats.txt')
925e6b
+
925e6b
+        # Verify that the destination must be a file:
925e6b
+        with self.assertRaises(TypeError):
925e6b
+            sys._debugmallocstats(42)
925e6b
+
925e6b
+
925e6b
 class SizeofTest(unittest.TestCase):
925e6b
 
925e6b
     def setUp(self):
925e6b
diff --git a/Objects/classobject.c b/Objects/classobject.c
925e6b
index 2c9c216..2ba7077 100644
925e6b
--- a/Objects/classobject.c
925e6b
+++ b/Objects/classobject.c
925e6b
@@ -2694,3 +2694,12 @@ PyMethod_Fini(void)
925e6b
 {
925e6b
     (void)PyMethod_ClearFreeList();
925e6b
 }
925e6b
+
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyMethod_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PyMethodObject",
925e6b
+                           numfree, sizeof(PyMethodObject));
925e6b
+}
925e6b
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
925e6b
index ba36b18..b8a5c7f 100644
925e6b
--- a/Objects/dictobject.c
925e6b
+++ b/Objects/dictobject.c
925e6b
@@ -225,6 +225,15 @@ show_track(void)
925e6b
 static PyDictObject *free_list[PyDict_MAXFREELIST];
925e6b
 static int numfree = 0;
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyDict_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PyDictObject", numfree, sizeof(PyDictObject));
925e6b
+}
925e6b
+
925e6b
+
925e6b
 void
925e6b
 PyDict_Fini(void)
925e6b
 {
925e6b
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
925e6b
index ba867ef..533511d 100644
925e6b
--- a/Objects/floatobject.c
925e6b
+++ b/Objects/floatobject.c
925e6b
@@ -35,6 +35,22 @@ typedef struct _floatblock PyFloatBlock;
925e6b
 static PyFloatBlock *block_list = NULL;
925e6b
 static PyFloatObject *free_list = NULL;
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyFloat_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+  int num_blocks = 0;
925e6b
+  PyFloatBlock *block;
925e6b
+
925e6b
+  /* Walk the block list, counting */
925e6b
+  for (block = block_list; block ; block = block->next) {
925e6b
+      num_blocks++;
925e6b
+  }
925e6b
+
925e6b
+  _PyDebugAllocatorStats(out,
925e6b
+                         "PyFloatBlock", num_blocks, sizeof(PyFloatBlock));
925e6b
+}
925e6b
+
925e6b
 static PyFloatObject *
925e6b
 fill_free_list(void)
925e6b
 {
925e6b
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
925e6b
index f9e4a0e..337fc58 100644
925e6b
--- a/Objects/frameobject.c
925e6b
+++ b/Objects/frameobject.c
925e6b
@@ -982,3 +982,13 @@ PyFrame_Fini(void)
925e6b
     Py_XDECREF(builtin_object);
925e6b
     builtin_object = NULL;
925e6b
 }
925e6b
+
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyFrame_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PyFrameObject",
925e6b
+                           numfree, sizeof(PyFrameObject));
925e6b
+}
925e6b
+
925e6b
diff --git a/Objects/intobject.c b/Objects/intobject.c
925e6b
index 28182f9..f442ea0 100644
925e6b
--- a/Objects/intobject.c
925e6b
+++ b/Objects/intobject.c
925e6b
@@ -44,6 +44,23 @@ typedef struct _intblock PyIntBlock;
925e6b
 static PyIntBlock *block_list = NULL;
925e6b
 static PyIntObject *free_list = NULL;
925e6b
 
925e6b
+
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyInt_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    int num_blocks = 0;
925e6b
+    PyIntBlock *block;
925e6b
+
925e6b
+    /* Walk the block list, counting */
925e6b
+    for (block = block_list; block ; block = block->next) {
925e6b
+        num_blocks++;
925e6b
+    }
925e6b
+
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "PyIntBlock", num_blocks, sizeof(PyIntBlock));
925e6b
+}
925e6b
+
925e6b
 static PyIntObject *
925e6b
 fill_free_list(void)
925e6b
 {
925e6b
diff --git a/Objects/listobject.c b/Objects/listobject.c
925e6b
index f753643..e6fa17d 100644
925e6b
--- a/Objects/listobject.c
925e6b
+++ b/Objects/listobject.c
925e6b
@@ -109,6 +109,15 @@ PyList_Fini(void)
925e6b
     }
925e6b
 }
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyList_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PyListObject",
925e6b
+                           numfree, sizeof(PyListObject));
925e6b
+}
925e6b
+
925e6b
 PyObject *
925e6b
 PyList_New(Py_ssize_t size)
925e6b
 {
925e6b
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
925e6b
index 0b60ca3..3193135 100644
925e6b
--- a/Objects/methodobject.c
925e6b
+++ b/Objects/methodobject.c
925e6b
@@ -412,6 +412,15 @@ PyCFunction_Fini(void)
925e6b
     (void)PyCFunction_ClearFreeList();
925e6b
 }
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyCFunction_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PyCFunction",
925e6b
+                           numfree, sizeof(PyCFunction));
925e6b
+}
925e6b
+
925e6b
 /* PyCFunction_New() is now just a macro that calls PyCFunction_NewEx(),
925e6b
    but it's part of the API so we need to keep a function around that
925e6b
    existing C extensions can call.
925e6b
diff --git a/Objects/object.c b/Objects/object.c
925e6b
index 14f4e9f..68aedcd 100644
925e6b
--- a/Objects/object.c
925e6b
+++ b/Objects/object.c
925e6b
@@ -2355,6 +2355,23 @@ PyMem_Free(void *p)
925e6b
     PyMem_FREE(p);
925e6b
 }
925e6b
 
925e6b
+void
925e6b
+_PyObject_DebugTypeStats(FILE *out)
925e6b
+{
925e6b
+    _PyString_DebugMallocStats(out);
925e6b
+    _PyCFunction_DebugMallocStats(out);
925e6b
+    _PyDict_DebugMallocStats(out);
925e6b
+    _PyFloat_DebugMallocStats(out);
925e6b
+    _PyFrame_DebugMallocStats(out);
925e6b
+    _PyInt_DebugMallocStats(out);
925e6b
+    _PyList_DebugMallocStats(out);
925e6b
+    _PyMethod_DebugMallocStats(out);
925e6b
+    _PySet_DebugMallocStats(out);
925e6b
+    _PyTuple_DebugMallocStats(out);
925e6b
+#if Py_USING_UNICODE
925e6b
+    _PyUnicode_DebugMallocStats(out);
925e6b
+#endif
925e6b
+}
925e6b
 
925e6b
 /* These methods are used to control infinite recursion in repr, str, print,
925e6b
    etc.  Container objects that may recursively contain themselves,
925e6b
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
925e6b
index 38ebc37..2c05359 100644
925e6b
--- a/Objects/obmalloc.c
925e6b
+++ b/Objects/obmalloc.c
925e6b
@@ -508,12 +508,10 @@ static struct arena_object* usable_arenas = NULL;
925e6b
 /* Number of arenas allocated that haven't been free()'d. */
925e6b
 static size_t narenas_currently_allocated = 0;
925e6b
 
925e6b
-#ifdef PYMALLOC_DEBUG
925e6b
 /* Total number of times malloc() called to allocate an arena. */
925e6b
 static size_t ntimes_arena_allocated = 0;
925e6b
 /* High water mark (max value ever seen) for narenas_currently_allocated. */
925e6b
 static size_t narenas_highwater = 0;
925e6b
-#endif
925e6b
 
925e6b
 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
925e6b
  * allocate a new arena, and return the address of an arena_object
925e6b
@@ -528,7 +526,7 @@ new_arena(void)
925e6b
 
925e6b
 #ifdef PYMALLOC_DEBUG
925e6b
     if (Py_GETENV("PYTHONMALLOCSTATS"))
925e6b
-        _PyObject_DebugMallocStats();
925e6b
+        _PyObject_DebugMallocStats(stderr);
925e6b
 #endif
925e6b
     if (unused_arena_objects == NULL) {
925e6b
         uint i;
925e6b
@@ -588,11 +586,9 @@ new_arena(void)
925e6b
     }
925e6b
 
925e6b
     ++narenas_currently_allocated;
925e6b
-#ifdef PYMALLOC_DEBUG
925e6b
     ++ntimes_arena_allocated;
925e6b
     if (narenas_currently_allocated > narenas_highwater)
925e6b
         narenas_highwater = narenas_currently_allocated;
925e6b
-#endif
925e6b
     arenaobj->freepools = NULL;
925e6b
     /* pool_address <- first pool-aligned address in the arena
925e6b
        nfreepools <- number of whole pools that fit after alignment */
925e6b
@@ -1694,17 +1690,19 @@ _PyObject_DebugDumpAddress(const void *p)
925e6b
     }
925e6b
 }
925e6b
 
925e6b
+#endif  /* PYMALLOC_DEBUG */
925e6b
+
925e6b
 static size_t
925e6b
-printone(const char* msg, size_t value)
925e6b
+printone(FILE *out, const char* msg, size_t value)
925e6b
 {
925e6b
     int i, k;
925e6b
     char buf[100];
925e6b
     size_t origvalue = value;
925e6b
 
925e6b
-    fputs(msg, stderr);
925e6b
+    fputs(msg, out);
925e6b
     for (i = (int)strlen(msg); i < 35; ++i)
925e6b
-        fputc(' ', stderr);
925e6b
-    fputc('=', stderr);
925e6b
+        fputc(' ', out);
925e6b
+    fputc('=', out);
925e6b
 
925e6b
     /* Write the value with commas. */
925e6b
     i = 22;
925e6b
@@ -1725,17 +1723,32 @@ printone(const char* msg, size_t value)
925e6b
 
925e6b
     while (i >= 0)
925e6b
         buf[i--] = ' ';
925e6b
-    fputs(buf, stderr);
925e6b
+    fputs(buf, out);
925e6b
 
925e6b
     return origvalue;
925e6b
 }
925e6b
 
925e6b
-/* Print summary info to stderr about the state of pymalloc's structures.
925e6b
+void
925e6b
+_PyDebugAllocatorStats(FILE *out,
925e6b
+                       const char *block_name, int num_blocks, size_t sizeof_block)
925e6b
+{
925e6b
+       char buf1[128];
925e6b
+       char buf2[128];
925e6b
+       PyOS_snprintf(buf1, sizeof(buf1),
925e6b
+                     "%d %ss * %zd bytes each",
925e6b
+                     num_blocks, block_name, sizeof_block);
925e6b
+       PyOS_snprintf(buf2, sizeof(buf2),
925e6b
+                     "%48s ", buf1);
925e6b
+      (void)printone(out, buf2, num_blocks * sizeof_block);
925e6b
+}
925e6b
+
925e6b
+
925e6b
+/* Print summary info to "out" about the state of pymalloc's structures.
925e6b
  * In Py_DEBUG mode, also perform some expensive internal consistency
925e6b
  * checks.
925e6b
  */
925e6b
 void
925e6b
-_PyObject_DebugMallocStats(void)
925e6b
+_PyObject_DebugMallocStats(FILE *out)
925e6b
 {
925e6b
     uint i;
925e6b
     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
925e6b
@@ -1764,7 +1777,7 @@ _PyObject_DebugMallocStats(void)
925e6b
     size_t total;
925e6b
     char buf[128];
925e6b
 
925e6b
-    fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
925e6b
+    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
925e6b
             SMALL_REQUEST_THRESHOLD, numclasses);
925e6b
 
925e6b
     for (i = 0; i < numclasses; ++i)
925e6b
@@ -1818,10 +1831,10 @@ _PyObject_DebugMallocStats(void)
925e6b
     }
925e6b
     assert(narenas == narenas_currently_allocated);
925e6b
 
925e6b
-    fputc('\n', stderr);
925e6b
+    fputc('\n', out);
925e6b
     fputs("class   size   num pools   blocks in use  avail blocks\n"
925e6b
           "-----   ----   ---------   -------------  ------------\n",
925e6b
-          stderr);
925e6b
+          out);
925e6b
 
925e6b
     for (i = 0; i < numclasses; ++i) {
925e6b
         size_t p = numpools[i];
925e6b
@@ -1832,7 +1845,7 @@ _PyObject_DebugMallocStats(void)
925e6b
             assert(b == 0 && f == 0);
925e6b
             continue;
925e6b
         }
925e6b
-        fprintf(stderr, "%5u %6u "
925e6b
+        fprintf(out, "%5u %6u "
925e6b
                         "%11" PY_FORMAT_SIZE_T "u "
925e6b
                         "%15" PY_FORMAT_SIZE_T "u "
925e6b
                         "%13" PY_FORMAT_SIZE_T "u\n",
925e6b
@@ -1842,36 +1855,35 @@ _PyObject_DebugMallocStats(void)
925e6b
         pool_header_bytes += p * POOL_OVERHEAD;
925e6b
         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
925e6b
     }
925e6b
-    fputc('\n', stderr);
925e6b
-    (void)printone("# times object malloc called", serialno);
925e6b
-
925e6b
-    (void)printone("# arenas allocated total", ntimes_arena_allocated);
925e6b
-    (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas);
925e6b
-    (void)printone("# arenas highwater mark", narenas_highwater);
925e6b
-    (void)printone("# arenas allocated current", narenas);
925e6b
+    fputc('\n', out);
925e6b
+#ifdef PYMALLOC_DEBUG
925e6b
+    (void)printone(out, "# times object malloc called", serialno);
925e6b
+#endif
925e6b
+    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
925e6b
+    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
925e6b
+    (void)printone(out, "# arenas highwater mark", narenas_highwater);
925e6b
+    (void)printone(out, "# arenas allocated current", narenas);
925e6b
 
925e6b
     PyOS_snprintf(buf, sizeof(buf),
925e6b
         "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
925e6b
         narenas, ARENA_SIZE);
925e6b
-    (void)printone(buf, narenas * ARENA_SIZE);
925e6b
+    (void)printone(out, buf, narenas * ARENA_SIZE);
925e6b
 
925e6b
-    fputc('\n', stderr);
925e6b
+    fputc('\n', out);
925e6b
 
925e6b
-    total = printone("# bytes in allocated blocks", allocated_bytes);
925e6b
-    total += printone("# bytes in available blocks", available_bytes);
925e6b
+    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
925e6b
+    total += printone(out, "# bytes in available blocks", available_bytes);
925e6b
 
925e6b
     PyOS_snprintf(buf, sizeof(buf),
925e6b
         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
925e6b
-    total += printone(buf, (size_t)numfreepools * POOL_SIZE);
925e6b
+    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
925e6b
 
925e6b
-    total += printone("# bytes lost to pool headers", pool_header_bytes);
925e6b
-    total += printone("# bytes lost to quantization", quantization);
925e6b
-    total += printone("# bytes lost to arena alignment", arena_alignment);
925e6b
-    (void)printone("Total", total);
925e6b
+    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
925e6b
+    total += printone(out, "# bytes lost to quantization", quantization);
925e6b
+    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
925e6b
+    (void)printone(out, "Total", total);
925e6b
 }
925e6b
 
925e6b
-#endif  /* PYMALLOC_DEBUG */
925e6b
-
925e6b
 #ifdef Py_USING_MEMORY_DEBUGGER
925e6b
 /* Make this function last so gcc won't inline it since the definition is
925e6b
  * after the reference.
925e6b
diff --git a/Objects/setobject.c b/Objects/setobject.c
925e6b
index af1ce16..3439b7c 100644
925e6b
--- a/Objects/setobject.c
925e6b
+++ b/Objects/setobject.c
925e6b
@@ -1088,6 +1088,16 @@ PySet_Fini(void)
925e6b
     Py_CLEAR(emptyfrozenset);
925e6b
 }
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PySet_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out,
925e6b
+                           "free PySetObject",
925e6b
+                           numfree, sizeof(PySetObject));
925e6b
+}
925e6b
+
925e6b
+
925e6b
 static PyObject *
925e6b
 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
925e6b
 {
925e6b
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
925e6b
index 1209197..b8646dd 100644
925e6b
--- a/Objects/stringobject.c
925e6b
+++ b/Objects/stringobject.c
925e6b
@@ -4843,3 +4843,43 @@ void _Py_ReleaseInternedStrings(void)
925e6b
     PyDict_Clear(interned);
925e6b
     Py_CLEAR(interned);
925e6b
 }
925e6b
+
925e6b
+void _PyString_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    ssize_t i;
925e6b
+    int num_immortal = 0, num_mortal = 0;
925e6b
+    ssize_t immortal_size = 0, mortal_size = 0;
925e6b
+
925e6b
+    if (interned == NULL || !PyDict_Check(interned))
925e6b
+        return;
925e6b
+
925e6b
+    for (i = 0; i <= ((PyDictObject*)interned)->ma_mask; i++) {
925e6b
+        PyDictEntry *ep = ((PyDictObject*)interned)->ma_table + i;
925e6b
+        PyObject *pvalue = ep->me_value;
925e6b
+        if (pvalue != NULL) {
925e6b
+            PyStringObject *s = (PyStringObject *)ep->me_key;
925e6b
+
925e6b
+            switch (s->ob_sstate) {
925e6b
+            case SSTATE_NOT_INTERNED:
925e6b
+                /* XXX Shouldn't happen */
925e6b
+                break;
925e6b
+            case SSTATE_INTERNED_IMMORTAL:
925e6b
+                num_immortal ++;
925e6b
+                immortal_size += s->ob_size;
925e6b
+                break;
925e6b
+            case SSTATE_INTERNED_MORTAL:
925e6b
+                num_mortal ++;
925e6b
+                mortal_size += s->ob_size;
925e6b
+                break;
925e6b
+            default:
925e6b
+                Py_FatalError("Inconsistent interned string state.");
925e6b
+            }
925e6b
+        }
925e6b
+    }
925e6b
+
925e6b
+    fprintf(out, "%d mortal interned strings\n", num_mortal);
925e6b
+    fprintf(out, "%d immortal interned strings\n", num_immortal);
925e6b
+    fprintf(out, "total size of all interned strings: "
925e6b
+            "%zi/%zi "
925e6b
+            "mortal/immortal\n", mortal_size, immortal_size);
925e6b
+}
925e6b
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
925e6b
index 00f2e47..7682d81 100644
925e6b
--- a/Objects/tupleobject.c
925e6b
+++ b/Objects/tupleobject.c
925e6b
@@ -44,6 +44,22 @@ show_track(void)
925e6b
 }
925e6b
 #endif
925e6b
 
925e6b
+/* Print summary info about the state of the optimized allocator */
925e6b
+void
925e6b
+_PyTuple_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+#if PyTuple_MAXSAVESIZE > 0
925e6b
+    int i;
925e6b
+    char buf[128];
925e6b
+    for (i = 1; i < PyTuple_MAXSAVESIZE; i++) {
925e6b
+        PyOS_snprintf(buf, sizeof(buf),
925e6b
+                      "free %d-sized PyTupleObject", i);
925e6b
+        _PyDebugAllocatorStats(out,
925e6b
+                               buf,
925e6b
+                               numfree[i], _PyObject_VAR_SIZE(&PyTuple_Type, i));
925e6b
+    }
925e6b
+#endif
925e6b
+}
925e6b
 
925e6b
 PyObject *
925e6b
 PyTuple_New(register Py_ssize_t size)
925e6b
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
925e6b
index 6bea370..ced9acf 100644
925e6b
--- a/Objects/unicodeobject.c
925e6b
+++ b/Objects/unicodeobject.c
925e6b
@@ -8920,6 +8920,12 @@ _PyUnicode_Fini(void)
925e6b
     (void)PyUnicode_ClearFreeList();
925e6b
 }
925e6b
 
925e6b
+void _PyUnicode_DebugMallocStats(FILE *out)
925e6b
+{
925e6b
+    _PyDebugAllocatorStats(out, "free PyUnicodeObject", numfree,
925e6b
+                           sizeof(PyUnicodeObject));
925e6b
+}
925e6b
+
925e6b
 #ifdef __cplusplus
925e6b
 }
925e6b
 #endif
925e6b
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
925e6b
index f0fbd74..0b73f3a 100644
925e6b
--- a/Python/pythonrun.c
925e6b
+++ b/Python/pythonrun.c
925e6b
@@ -557,7 +557,7 @@ Py_Finalize(void)
925e6b
 #endif /* Py_TRACE_REFS */
925e6b
 #ifdef PYMALLOC_DEBUG
925e6b
     if (Py_GETENV("PYTHONMALLOCSTATS"))
925e6b
-        _PyObject_DebugMallocStats();
925e6b
+        _PyObject_DebugMallocStats(stderr);
925e6b
 #endif
925e6b
 
925e6b
     call_ll_exitfuncs();
925e6b
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
925e6b
index 2a7c207..fbb637b 100644
925e6b
--- a/Python/sysmodule.c
925e6b
+++ b/Python/sysmodule.c
925e6b
@@ -873,6 +873,57 @@ a 11-tuple where the entries in the tuple are counts of:\n\
925e6b
 extern "C" {
925e6b
 #endif
925e6b
 
925e6b
+static PyObject *
925e6b
+sys_debugmallocstats(PyObject *self, PyObject *args)
925e6b
+{
925e6b
+    PyObject *file = NULL;
925e6b
+    FILE *fp;
925e6b
+
925e6b
+    if (!PyArg_ParseTuple(args, "|O!",
925e6b
+                          &PyFile_Type, &file)) {
925e6b
+      return NULL;
925e6b
+    }
925e6b
+    if (!file) {
925e6b
+        /* Default to sys.stderr: */
925e6b
+      file = PySys_GetObject("stderr");
925e6b
+      if (!file) {
925e6b
+          PyErr_SetString(PyExc_ValueError, "sys.stderr not set");
925e6b
+          return NULL;
925e6b
+      }
925e6b
+      if (!PyFile_Check(file)) {
925e6b
+          PyErr_SetString(PyExc_TypeError, "sys.stderr is not a file");
925e6b
+          return NULL;
925e6b
+      }
925e6b
+    }
925e6b
+
925e6b
+    Py_INCREF(file);
925e6b
+    /* OK, we now own a ref on non-NULL "file" */
925e6b
+
925e6b
+    fp = PyFile_AsFile(file);
925e6b
+    if (!fp) {
925e6b
+        PyErr_SetString(PyExc_ValueError, "file is closed");
925e6b
+        Py_DECREF(file);
925e6b
+        return NULL;
925e6b
+    }
925e6b
+
925e6b
+    _PyObject_DebugMallocStats(fp);
925e6b
+    fputc('\n', fp);
925e6b
+    _PyObject_DebugTypeStats(fp);
925e6b
+
925e6b
+    Py_DECREF(file);
925e6b
+
925e6b
+    Py_RETURN_NONE;
925e6b
+}
925e6b
+PyDoc_STRVAR(debugmallocstats_doc,
925e6b
+"_debugmallocstats([file])\n\
925e6b
+\n\
925e6b
+Print summary info to the given file (or sys.stderr) about the state of\n\
925e6b
+pymalloc's structures.\n\
925e6b
+\n\
925e6b
+In Py_DEBUG mode, also perform some expensive internal consistency\n\
925e6b
+checks.\n\
925e6b
+");
925e6b
+
925e6b
 #ifdef Py_TRACE_REFS
925e6b
 /* Defined in objects.c because it uses static globals if that file */
925e6b
 extern PyObject *_Py_GetObjects(PyObject *, PyObject *);
925e6b
@@ -971,6 +1022,8 @@ static PyMethodDef sys_methods[] = {
925e6b
     {"settrace",        sys_settrace, METH_O, settrace_doc},
925e6b
     {"gettrace",        sys_gettrace, METH_NOARGS, gettrace_doc},
925e6b
     {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
925e6b
+    {"_debugmallocstats", sys_debugmallocstats, METH_VARARGS,
925e6b
+     debugmallocstats_doc},
925e6b
     {NULL,              NULL}           /* sentinel */
925e6b
 };
925e6b