Blame SOURCES/00055-systemtap.patch

2a6dbc
diff -up Python-3.3.0rc2/configure.ac.systemtap Python-3.3.0rc2/configure.ac
2a6dbc
--- Python-3.3.0rc2/configure.ac.systemtap	2012-09-09 05:11:14.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/configure.ac	2012-09-10 09:17:21.114511781 -0400
2a6dbc
@@ -2678,6 +2678,23 @@ if test "$with_valgrind" != no; then
2a6dbc
     OPT="-DDYNAMIC_ANNOTATIONS_ENABLED=1 $OPT"
2a6dbc
 fi
2a6dbc
 
2a6dbc
+# Check for systemtap support
2a6dbc
+# On Linux, /usr/bin/dtrace is in fact a shim to SystemTap
2a6dbc
+AC_MSG_CHECKING([for --with-systemtap])
2a6dbc
+AC_ARG_WITH([systemtap],
2a6dbc
+            AC_HELP_STRING([--with(out)-systemtap], [disable/enable SystemTap support]),,
2a6dbc
+            with_systemtap=no)
2a6dbc
+AC_MSG_RESULT([$with_systemtap])
2a6dbc
+if test "$with_systemtap" != no; then
2a6dbc
+    AC_DEFINE(WITH_SYSTEMTAP, 1,
2a6dbc
+        [Define if you want to compile in SystemTap support])
2a6dbc
+    SYSTEMTAPOBJS="Python/pysystemtap.o"
2a6dbc
+    SYSTEMTAPDEPS="\$(srcdir)/Python/pysystemtap.h"
2a6dbc
+fi
2a6dbc
+
2a6dbc
+AC_SUBST(SYSTEMTAPOBJS)
2a6dbc
+AC_SUBST(SYSTEMTAPDEPS)
2a6dbc
+
2a6dbc
 # -I${DLINCLDIR} is added to the compile rule for importdl.o
2a6dbc
 AC_SUBST(DLINCLDIR)
2a6dbc
 DLINCLDIR=.
2a6dbc
diff -up Python-3.3.0rc2/configure.systemtap Python-3.3.0rc2/configure
2a6dbc
--- Python-3.3.0rc2/configure.systemtap	2012-09-09 05:11:14.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/configure	2012-09-10 09:17:21.116511780 -0400
2a6dbc
@@ -618,6 +618,8 @@ TRUE
2a6dbc
 MACHDEP_OBJS
2a6dbc
 DYNLOADFILE
2a6dbc
 DLINCLDIR
2a6dbc
+SYSTEMTAPDEPS
2a6dbc
+SYSTEMTAPOBJS
2a6dbc
 THREADOBJ
2a6dbc
 LDLAST
2a6dbc
 USE_THREAD_MODULE
2a6dbc
@@ -779,6 +781,7 @@ with_doc_strings
2a6dbc
 with_tsc
2a6dbc
 with_pymalloc
2a6dbc
 with_valgrind
2a6dbc
+with_systemtap
2a6dbc
 with_fpectl
2a6dbc
 with_libm
2a6dbc
 with_libc
2a6dbc
@@ -1456,6 +1459,7 @@ Optional Packages:
2a6dbc
   --with(out)-tsc         enable/disable timestamp counter profile
2a6dbc
   --with(out)-pymalloc    disable/enable specialized mallocs
2a6dbc
   --with-valgrind         Enable Valgrind support
2a6dbc
+  --with(out)-systemtap   disable/enable SystemTap support
2a6dbc
   --with-fpectl           enable SIGFPE catching
2a6dbc
   --with-libm=STRING      math library
2a6dbc
   --with-libc=STRING      C library
2a6dbc
@@ -10065,6 +10069,31 @@ fi
2a6dbc
     OPT="-DDYNAMIC_ANNOTATIONS_ENABLED=1 $OPT"
2a6dbc
 fi
2a6dbc
 
2a6dbc
+# Check for systemtap support
2a6dbc
+# On Linux, /usr/bin/dtrace is in fact a shim to SystemTap
2a6dbc
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-systemtap" >&5
2a6dbc
+$as_echo_n "checking for --with-systemtap... " >&6; }
2a6dbc
+
2a6dbc
+# Check whether --with-systemtap was given.
2a6dbc
+if test "${with_systemtap+set}" = set; then :
2a6dbc
+  withval=$with_systemtap;
2a6dbc
+else
2a6dbc
+  with_systemtap=no
2a6dbc
+fi
2a6dbc
+
2a6dbc
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_systemtap" >&5
2a6dbc
+$as_echo "$with_systemtap" >&6; }
2a6dbc
+if test "$with_systemtap" != no; then
2a6dbc
+
2a6dbc
+$as_echo "#define WITH_SYSTEMTAP 1" >>confdefs.h
2a6dbc
+
2a6dbc
+    SYSTEMTAPOBJS="Python/pysystemtap.o"
2a6dbc
+    SYSTEMTAPDEPS="\$(srcdir)/Python/pysystemtap.h"
2a6dbc
+fi
2a6dbc
+
2a6dbc
+
2a6dbc
+
2a6dbc
+
2a6dbc
 # -I${DLINCLDIR} is added to the compile rule for importdl.o
2a6dbc
 
2a6dbc
 DLINCLDIR=.
2a6dbc
diff -up Python-3.3.0rc2/Doc/howto/index.rst.systemtap Python-3.3.0rc2/Doc/howto/index.rst
2a6dbc
--- Python-3.3.0rc2/Doc/howto/index.rst.systemtap	2012-09-09 05:10:51.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/Doc/howto/index.rst	2012-09-10 09:17:21.117511779 -0400
2a6dbc
@@ -29,4 +29,5 @@ Currently, the HOWTOs are:
2a6dbc
    argparse.rst
2a6dbc
    ipaddress.rst
2a6dbc
    clinic.rst
2a6dbc
+   instrumentation.rst
2a6dbc
 
2a6dbc
diff -up Python-3.3.0rc2/Doc/howto/instrumentation.rst.systemtap Python-3.3.0rc2/Doc/howto/instrumentation.rst
2a6dbc
--- Python-3.3.0rc2/Doc/howto/instrumentation.rst.systemtap	2012-09-10 09:17:21.117511779 -0400
2a6dbc
+++ Python-3.3.0rc2/Doc/howto/instrumentation.rst	2012-09-10 09:17:21.117511779 -0400
2a6dbc
@@ -0,0 +1,295 @@
2a6dbc
+.. _instrumentation:
2a6dbc
+
2a6dbc
+====================================
2a6dbc
+Instrumenting CPython with SystemTap
2a6dbc
+====================================
2a6dbc
+
2a6dbc
+:author: David Malcolm <dmalcolm@redhat.com>
2a6dbc
+
2a6dbc
+DTrace and SystemTap are monitoring tools, each providing a way to inspect
2a6dbc
+what the processes on a computer system are doing.  They both use
2a6dbc
+domain-specific languages allowing a user to write scripts which:
2a6dbc
+
2a6dbc
+  - filter which processes are to be observed
2a6dbc
+  - gather data from the processes of interest
2a6dbc
+  - generate reports on the data
2a6dbc
+
2a6dbc
+As of Python 3.3, CPython can be built with embedded "markers" that can be
2a6dbc
+observed by a SystemTap script, making it easier to monitor what the CPython
2a6dbc
+processes on a system are doing.
2a6dbc
+
2a6dbc
+.. Potentially this document could be expanded to also cover DTrace markers.
2a6dbc
+   However, I'm not a DTrace expert.
2a6dbc
+
2a6dbc
+.. I'm using ".. code-block:: c" for SystemTap scripts, as "c" is syntactically
2a6dbc
+   the closest match that Sphinx supports
2a6dbc
+
2a6dbc
+
2a6dbc
+Enabling the static markers
2a6dbc
+---------------------------
2a6dbc
+
2a6dbc
+In order to build CPython with the embedded markers for SystemTap, the
2a6dbc
+SystemTap development tools must be installed.
2a6dbc
+
2a6dbc
+On a Fedora or Red Hat Enterprise Linux machine, this can be done via::
2a6dbc
+
2a6dbc
+   yum install systemtap-sdt-devel
2a6dbc
+
2a6dbc
+CPython must then be configured `--with-systemtap`::
2a6dbc
+
2a6dbc
+   checking for --with-systemtap... yes
2a6dbc
+
2a6dbc
+You can verify if the SystemTap static markers are present in the built
2a6dbc
+binary by seeing if it contains a ".note.stapsdt" section.
2a6dbc
+
2a6dbc
+.. code-block:: bash
2a6dbc
+
2a6dbc
+   $ eu-readelf -S ./python | grep .note.stapsdt
2a6dbc
+   [29] .note.stapsdt        NOTE         0000000000000000 00308d78 000000b8  0        0   0  4
2a6dbc
+
2a6dbc
+If you've built python as a shared library (with --enable-shared), you need
2a6dbc
+to look instead within the shared library.  For example:
2a6dbc
+
2a6dbc
+.. code-block:: bash
2a6dbc
+
2a6dbc
+   $ eu-readelf -S libpython3.3dm.so.1.0 | grep .note.stapsdt
2a6dbc
+   [28] .note.stapsdt        NOTE         0000000000000000 00365b68 000000b8  0        0   0  4
2a6dbc
+
2a6dbc
+Earlier versions of SystemTap stored the markers in a ".probes" section.
2a6dbc
+
2a6dbc
+For the curious, you can see the metadata for the static markers using this
2a6dbc
+invocation.
2a6dbc
+
2a6dbc
+.. code-block:: bash
2a6dbc
+
2a6dbc
+  $ eu-readelf -x .note.stapsdt ./python
2a6dbc
+
2a6dbc
+  Hex dump of section [29] '.note.stapsdt', 184 bytes at offset 0x308d78:
2a6dbc
+    0x00000000 08000000 45000000 03000000 73746170 ....E.......stap
2a6dbc
+    0x00000010 73647400 d4664b00 00000000 4fc36600 sdt..fK.....O.f.
2a6dbc
+    0x00000020 00000000 488d9000 00000000 70797468 ....H.......pyth
2a6dbc
+    0x00000030 6f6e0066 756e6374 696f6e5f 5f656e74 on.function__ent
2a6dbc
+    0x00000040 72790038 40257261 78203840 25726478 ry.8@%rax 8@%rdx
2a6dbc
+    0x00000050 202d3440 25656378 00000000 08000000  -4@%ecx........
2a6dbc
+    0x00000060 46000000 03000000 73746170 73647400 F.......stapsdt.
2a6dbc
+    0x00000070 0d674b00 00000000 4fc36600 00000000 .gK.....O.f.....
2a6dbc
+    0x00000080 4a8d9000 00000000 70797468 6f6e0066 J.......python.f
2a6dbc
+    0x00000090 756e6374 696f6e5f 5f726574 75726e00 unction__return.
2a6dbc
+    0x000000a0 38402572 61782038 40257264 78202d34 8@%rax 8@%rdx -4
2a6dbc
+    0x000000b0 40256563 78000000                   @%ecx...
2a6dbc
+
2a6dbc
+and a sufficiently modern eu-readelf can print the metadata:
2a6dbc
+
2a6dbc
+.. code-block:: bash
2a6dbc
+
2a6dbc
+  $ eu-readelf -n ./python
2a6dbc
+
2a6dbc
+  Note section [ 1] '.note.gnu.build-id' of 36 bytes at offset 0x190:
2a6dbc
+    Owner          Data size  Type
2a6dbc
+    GNU                   20  GNU_BUILD_ID
2a6dbc
+      Build ID: a28f8db1b224530b0d38ad7b82a249cf7c3f18d6
2a6dbc
+
2a6dbc
+  Note section [27] '.note.stapsdt' of 184 bytes at offset 0x1ae884:
2a6dbc
+    Owner          Data size  Type
2a6dbc
+    stapsdt               70  Version: 3
2a6dbc
+      PC: 0xe0d3a, Base: 0x14b150, Semaphore: 0x3ae882
2a6dbc
+      Provider: python, Name: function__return, Args: '8@%rbx 8@%r13 -4@%eax'
2a6dbc
+    stapsdt               69  Version: 3
2a6dbc
+      PC: 0xe0f37, Base: 0x14b150, Semaphore: 0x3ae880
2a6dbc
+      Provider: python, Name: function__entry, Args: '8@%rbx 8@%r13 -4@%eax'
2a6dbc
+
2a6dbc
+The above metadata contains information for SystemTap describing how it can
2a6dbc
+patch strategically-placed machine code instructions to enable the tracing
2a6dbc
+hooks used by a SystemTap script.
2a6dbc
+
2a6dbc
+
2a6dbc
+Static markers
2a6dbc
+--------------
2a6dbc
+
2a6dbc
+The low-level way to use the SystemTap integration is to use the static
2a6dbc
+markers directly.  This requires you to explicitly state the binary file
2a6dbc
+containing them.
2a6dbc
+
2a6dbc
+For example, this script can be used to show the call/return hierarchy of a
2a6dbc
+Python script:
2a6dbc
+
2a6dbc
+.. code-block:: c
2a6dbc
+
2a6dbc
+   probe process('python').mark("function__entry") {
2a6dbc
+        filename = user_string($arg1);
2a6dbc
+        funcname = user_string($arg2);
2a6dbc
+        lineno = $arg3;
2a6dbc
+
2a6dbc
+        printf("%s => %s in %s:%d\\n",
2a6dbc
+               thread_indent(1), funcname, filename, lineno);
2a6dbc
+   }
2a6dbc
+
2a6dbc
+   probe process('python').mark("function__return") {
2a6dbc
+       filename = user_string($arg1);
2a6dbc
+       funcname = user_string($arg2);
2a6dbc
+       lineno = $arg3;
2a6dbc
+
2a6dbc
+       printf("%s <= %s in %s:%d\\n",
2a6dbc
+              thread_indent(-1), funcname, filename, lineno);
2a6dbc
+   }
2a6dbc
+
2a6dbc
+It can be invoked like this:
2a6dbc
+
2a6dbc
+.. code-block:: bash
2a6dbc
+
2a6dbc
+   $ stap \
2a6dbc
+     show-call-hierarchy.stp \
2a6dbc
+     -c ./python test.py
2a6dbc
+
2a6dbc
+The output looks like this::
2a6dbc
+
2a6dbc
+   11408 python(8274):        => __contains__ in Lib/_abcoll.py:362
2a6dbc
+   11414 python(8274):         => __getitem__ in Lib/os.py:425
2a6dbc
+   11418 python(8274):          => encode in Lib/os.py:490
2a6dbc
+   11424 python(8274):          <= encode in Lib/os.py:493
2a6dbc
+   11428 python(8274):         <= __getitem__ in Lib/os.py:426
2a6dbc
+   11433 python(8274):        <= __contains__ in Lib/_abcoll.py:366
2a6dbc
+
2a6dbc
+where the columns are:
2a6dbc
+
2a6dbc
+  - time in microseconds since start of script
2a6dbc
+
2a6dbc
+  - name of executable
2a6dbc
+
2a6dbc
+  - PID of process
2a6dbc
+
2a6dbc
+and the remainder indicates the call/return hierarchy as the script executes.
2a6dbc
+
2a6dbc
+For a `--enable-shared` build of CPython, the markers are contained within the
2a6dbc
+libpython shared library, and the probe's dotted path needs to reflect this. For
2a6dbc
+example, this line from the above example::
2a6dbc
+
2a6dbc
+   probe process('python').mark("function__entry") {
2a6dbc
+
2a6dbc
+should instead read::
2a6dbc
+
2a6dbc
+   probe process('python').library("libpython3.3dm.so.1.0").mark("function__entry") {
2a6dbc
+
2a6dbc
+(assuming a debug build of CPython 3.3)
2a6dbc
+
2a6dbc
+.. I'm reusing the "c:function" type for markers
2a6dbc
+
2a6dbc
+.. c:function:: function__entry(str filename, str funcname, int lineno)
2a6dbc
+
2a6dbc
+   This marker indicates that execution of a Python function has begun.  It is
2a6dbc
+   only triggered for pure-python (bytecode) functions.
2a6dbc
+
2a6dbc
+   The filename, function name, and line number are provided back to the
2a6dbc
+   tracing script as positional arguments, which must be accessed using
2a6dbc
+   `$arg1`, `$arg2`:
2a6dbc
+
2a6dbc
+       * `$arg1` : `(const char *)` filename, accessible using `user_string($arg1)`
2a6dbc
+
2a6dbc
+       * `$arg2` : `(const char *)` function name, accessible using
2a6dbc
+         `user_string($arg2)`
2a6dbc
+
2a6dbc
+       * `$arg3` : `int` line number
2a6dbc
+
2a6dbc
+       * `$arg4` : `(PyFrameObject *)`, the frame being executed
2a6dbc
+
2a6dbc
+.. c:function:: function__return(str filename, str funcname, int lineno)
2a6dbc
+
2a6dbc
+   This marker is the converse of `function__entry`, and indicates that
2a6dbc
+   execution of a Python function has ended (either via ``return``, or via an
2a6dbc
+   exception).  It is only triggered for pure-python (bytecode) functions.
2a6dbc
+
2a6dbc
+   The arguments are the same as for `function__entry`
2a6dbc
+
2a6dbc
+
2a6dbc
+Tapsets
2a6dbc
+-------
2a6dbc
+
2a6dbc
+The higher-level way to use the SystemTap integration is to use a "tapset":
2a6dbc
+SystemTap's equivalent of a library, which hides some of the lower-level
2a6dbc
+details of the static markers.
2a6dbc
+
2a6dbc
+Here is a tapset file, based on a non-shared build of CPython:
2a6dbc
+
2a6dbc
+.. code-block:: c
2a6dbc
+
2a6dbc
+    /*
2a6dbc
+       Provide a higher-level wrapping around the function__entry and
2a6dbc
+       function__return markers:
2a6dbc
+     */
2a6dbc
+    probe python.function.entry = process("python").mark("function__entry")
2a6dbc
+    {
2a6dbc
+        filename = user_string($arg1);
2a6dbc
+        funcname = user_string($arg2);
2a6dbc
+        lineno = $arg3;
2a6dbc
+        frameptr = $arg4
2a6dbc
+    }
2a6dbc
+    probe python.function.return = process("python").mark("function__return")
2a6dbc
+    {
2a6dbc
+        filename = user_string($arg1);
2a6dbc
+        funcname = user_string($arg2);
2a6dbc
+        lineno = $arg3;
2a6dbc
+        frameptr = $arg4
2a6dbc
+    }
2a6dbc
+
2a6dbc
+If this file is installed in SystemTap's tapset directory (e.g.
2a6dbc
+`/usr/share/systemtap/tapset`), then these additional probepoints become
2a6dbc
+available:
2a6dbc
+
2a6dbc
+.. c:function:: python.function.entry(str filename, str funcname, int lineno, frameptr)
2a6dbc
+
2a6dbc
+   This probe point indicates that execution of a Python function has begun.
2a6dbc
+   It is only triggered for pure-python (bytecode) functions.
2a6dbc
+
2a6dbc
+.. c:function:: python.function.return(str filename, str funcname, int lineno, frameptr)
2a6dbc
+
2a6dbc
+   This probe point is the converse of `python.function.return`, and indicates
2a6dbc
+   that execution of a Python function has ended (either via ``return``, or
2a6dbc
+   via an exception).  It is only triggered for pure-python (bytecode) functions.
2a6dbc
+
2a6dbc
+
2a6dbc
+Examples
2a6dbc
+--------
2a6dbc
+This SystemTap script uses the tapset above to more cleanly implement the
2a6dbc
+example given above of tracing the Python function-call hierarchy, without
2a6dbc
+needing to directly name the static markers:
2a6dbc
+
2a6dbc
+.. code-block:: c
2a6dbc
+
2a6dbc
+    probe python.function.entry
2a6dbc
+    {
2a6dbc
+      printf("%s => %s in %s:%d\n",
2a6dbc
+             thread_indent(1), funcname, filename, lineno);
2a6dbc
+    }
2a6dbc
+
2a6dbc
+    probe python.function.return
2a6dbc
+    {
2a6dbc
+      printf("%s <= %s in %s:%d\n",
2a6dbc
+             thread_indent(-1), funcname, filename, lineno);
2a6dbc
+    }
2a6dbc
+
2a6dbc
+
2a6dbc
+The following script uses the tapset above to provide a top-like view of all
2a6dbc
+running CPython code, showing the top 20 most frequently-entered bytecode
2a6dbc
+frames, each second, across the whole system:
2a6dbc
+
2a6dbc
+.. code-block:: c
2a6dbc
+
2a6dbc
+    global fn_calls;
2a6dbc
+
2a6dbc
+    probe python.function.entry
2a6dbc
+    {
2a6dbc
+      fn_calls[pid(), filename, funcname, lineno] += 1;
2a6dbc
+    }
2a6dbc
+
2a6dbc
+    probe timer.ms(1000) {
2a6dbc
+        printf("\033[2J\033[1;1H") /* clear screen */
2a6dbc
+        printf("%6s %80s %6s %30s %6s\n",
2a6dbc
+               "PID", "FILENAME", "LINE", "FUNCTION", "CALLS")
2a6dbc
+        foreach ([pid, filename, funcname, lineno] in fn_calls- limit 20) {
2a6dbc
+            printf("%6d %80s %6d %30s %6d\n",
2a6dbc
+                pid, filename, lineno, funcname,
2a6dbc
+                fn_calls[pid, filename, funcname, lineno]);
2a6dbc
+        }
2a6dbc
+        delete fn_calls;
2a6dbc
+    }
2a6dbc
+
2a6dbc
diff -up Python-3.3.0rc2/Lib/test/test_systemtap.py.systemtap Python-3.3.0rc2/Lib/test/test_systemtap.py
2a6dbc
--- Python-3.3.0rc2/Lib/test/test_systemtap.py.systemtap	2012-09-10 09:17:21.117511779 -0400
2a6dbc
+++ Python-3.3.0rc2/Lib/test/test_systemtap.py	2012-09-10 09:17:21.117511779 -0400
2a6dbc
@@ -0,0 +1,234 @@
2a6dbc
+# Verify that systemtap static probes work
2a6dbc
+#
2a6dbc
+import subprocess
2a6dbc
+import sys
2a6dbc
+import sysconfig
2a6dbc
+import os
2a6dbc
+import unittest
2a6dbc
+
2a6dbc
+from test.support import run_unittest, TESTFN, unlink
2a6dbc
+
2a6dbc
+if '--with-systemtap' not in sysconfig.get_config_var('CONFIG_ARGS'):
2a6dbc
+    raise unittest.SkipTest("Python was not configured --with-systemtap")
2a6dbc
+
2a6dbc
+try:
2a6dbc
+    _, stap_version = subprocess.Popen(["stap", "-V"],
2a6dbc
+                                       stdout=subprocess.PIPE,
2a6dbc
+                                       stderr=subprocess.PIPE,
2a6dbc
+                                       ).communicate()
2a6dbc
+except OSError:
2a6dbc
+    # This is what "no stap" looks like.  There may, however, be other
2a6dbc
+    # errors that manifest this way too.
2a6dbc
+    raise unittest.SkipTest("Couldn't find stap on the path")
2a6dbc
+
2a6dbc
+def invoke_systemtap_script(script, cmd):
2a6dbc
+    # Start a child process, probing with the given systemtap script
2a6dbc
+    # (passed as stdin to the "stap" tool)
2a6dbc
+    # The script should be a bytes instance
2a6dbc
+    # Return (stdout, stderr) pair
2a6dbc
+
2a6dbc
+    p = subprocess.Popen(["stap", "-", '-vv', '-c', cmd],
2a6dbc
+                         stdin=subprocess.PIPE,
2a6dbc
+                         stdout=subprocess.PIPE,
2a6dbc
+                         stderr=subprocess.PIPE)
2a6dbc
+    out, err = p.communicate(input=script)
2a6dbc
+    return out, err
2a6dbc
+
2a6dbc
+# Verify that stap can run a simple "hello world"-style script
2a6dbc
+# This can fail for various reasons:
2a6dbc
+# - missing kernel headers
2a6dbc
+# - permissions (a non-root user needs to be in the "stapdev" group)
2a6dbc
+TRIVIAL_STAP_SCRIPT = b'probe begin { println("hello world") exit () }'
2a6dbc
+
2a6dbc
+out, err = invoke_systemtap_script(TRIVIAL_STAP_SCRIPT, 'true')
2a6dbc
+if out != b'hello world\n':
2a6dbc
+    raise unittest.SkipTest("Test systemtap script did not run; stderr was: %s" % err)
2a6dbc
+
2a6dbc
+# We don't expect stderr to be empty, since we're invoking stap with "-vv": stap
2a6dbc
+# will (we hope) generate debugging output on stderr.
2a6dbc
+
2a6dbc
+def invoke_python_under_systemtap(script, pythoncode=None, pythonfile=None):
2a6dbc
+    # Start a child python process, probing with the given systemtap script
2a6dbc
+    # (passed as stdin to the "stap" tool)
2a6dbc
+    # The script should be a bytes instance
2a6dbc
+    # Return (stdout, stderr) pair
2a6dbc
+
2a6dbc
+    if pythonfile:
2a6dbc
+        pythoncmd = '%s %s' % (sys.executable, pythonfile)
2a6dbc
+    else:
2a6dbc
+        pythoncmd = '%s -c %r' % (sys.executable, pythoncode)
2a6dbc
+
2a6dbc
+    # The process tree of a stap invocation of a command goes through
2a6dbc
+    # something like this:
2a6dbc
+    #    stap ->fork/exec(staprun; exec stapio ->f/e(-c cmd); exec staprun -r)
2a6dbc
+    # and this trip through setuid leads to LD_LIBRARY_PATH being dropped,
2a6dbc
+    # which would lead to an --enable-shared build of python failing to be
2a6dbc
+    # find its libpython, with an error like:
2a6dbc
+    #    error while loading shared libraries: libpython3.3dm.so.1.0: cannot
2a6dbc
+    #    open shared object file: No such file or directory
2a6dbc
+    # Hence we need to jump through some hoops to expose LD_LIBRARY_PATH to
2a6dbc
+    # the invoked python process:
2a6dbc
+    LD_LIBRARY_PATH = os.environ.get('LD_LIBRARY_PATH', '')
2a6dbc
+    if LD_LIBRARY_PATH:
2a6dbc
+        pythoncmd = 'env LD_LIBRARY_PATH=%s ' % LD_LIBRARY_PATH + pythoncmd
2a6dbc
+
2a6dbc
+    return invoke_systemtap_script(script, pythoncmd)
2a6dbc
+
2a6dbc
+# When using the static markers, we need to supply the prefix of a systemtap
2a6dbc
+# dotted probe point that containing the marker.
2a6dbc
+# See http://sourceware.org/systemtap/langref/Probe_points.html
2a6dbc
+#
2a6dbc
+# We need to determine if this is a shared-library build
2a6dbc
+#
2a6dbc
+# Note that sysconfig can get this wrong; see:
2a6dbc
+#   http://bugs.python.org/issue14774
2a6dbc
+#
2a6dbc
+if '--enable-shared' in sysconfig.get_config_var('CONFIG_ARGS'):
2a6dbc
+    # For a shared-library build, the markers are in library(INSTSONAME):
2a6dbc
+    INSTSONAME = sysconfig.get_config_var('INSTSONAME')
2a6dbc
+    probe_prefix = 'process("%s").library("%s")' % (sys.executable, INSTSONAME)
2a6dbc
+else:
2a6dbc
+    # For a non-shared-library build, we can simply use sys.executable:
2a6dbc
+    probe_prefix = 'process("%s")' % sys.executable
2a6dbc
+
2a6dbc
+# The following script ought to generate lots of lines showing recursive
2a6dbc
+# function entry and return, of the form:
2a6dbc
+#     11408 python(8274):        => __contains__ in Lib/_abcoll.py:362
2a6dbc
+#     11414 python(8274):         => __getitem__ in Lib/os.py:425
2a6dbc
+#     11418 python(8274):          => encode in Lib/os.py:490
2a6dbc
+#     11424 python(8274):          <= encode in Lib/os.py:493
2a6dbc
+#     11428 python(8274):         <= __getitem__ in Lib/os.py:426
2a6dbc
+#     11433 python(8274):        <= __contains__ in Lib/_abcoll.py:366
2a6dbc
+# where the column are:
2a6dbc
+#  - time in microseconds since start of script
2a6dbc
+#  - name of executable
2a6dbc
+#  - PID of process
2a6dbc
+#  and the remainder indicates the call/return hierarchy
2a6dbc
+
2a6dbc
+hierarchy_script = ('''
2a6dbc
+probe %s.mark("function__entry") {
2a6dbc
+    filename = user_string($arg1);
2a6dbc
+    funcname = user_string($arg2);
2a6dbc
+    lineno = $arg3;
2a6dbc
+
2a6dbc
+    printf("%%s => %%s in %%s:%%d\\n", thread_indent(1), funcname, filename, lineno);
2a6dbc
+}
2a6dbc
+
2a6dbc
+probe %s.mark("function__return") {
2a6dbc
+    filename = user_string($arg1);
2a6dbc
+    funcname = user_string($arg2);
2a6dbc
+    lineno = $arg3;
2a6dbc
+
2a6dbc
+    printf("%%s <= %%s in %%s:%%d\\n", thread_indent(-1), funcname, filename, lineno);
2a6dbc
+}
2a6dbc
+''' % (probe_prefix, probe_prefix)).encode('utf-8')
2a6dbc
+
2a6dbc
+
2a6dbc
+class ErrorDumper:
2a6dbc
+    # A context manager that dumps extra information if an exception is raised,
2a6dbc
+    # to help track down why the problem occurred
2a6dbc
+    def __init__(self, out, err):
2a6dbc
+        self.out = out
2a6dbc
+        self.err = err
2a6dbc
+
2a6dbc
+    def __enter__(self):
2a6dbc
+        pass
2a6dbc
+
2a6dbc
+    def __exit__(self, type_, value, traceback):
2a6dbc
+        if type_:
2a6dbc
+            # an exception is being raised:
2a6dbc
+            print('stdout: %s' % out.decode())
2a6dbc
+            print('stderr: %s' % err.decode())
2a6dbc
+
2a6dbc
+class SystemtapTests(unittest.TestCase):
2a6dbc
+
2a6dbc
+    def test_invoking_python(self):
2a6dbc
+        # Ensure that we can invoke python under stap, with a trivial stap
2a6dbc
+        # script:
2a6dbc
+        out, err = invoke_python_under_systemtap(
2a6dbc
+            b'probe begin { println("hello from stap") exit () }',
2a6dbc
+            pythoncode="print('hello from python')")
2a6dbc
+        with ErrorDumper(out, err):
2a6dbc
+            self.assertIn(b'hello from stap', out)
2a6dbc
+            self.assertIn(b'hello from python', out)
2a6dbc
+
2a6dbc
+    def test_function_entry(self):
2a6dbc
+        # Ensure that the function_entry static marker works
2a6dbc
+        out, err = invoke_python_under_systemtap(hierarchy_script)
2a6dbc
+        # stdout ought to contain various lines showing recursive function
2a6dbc
+        # entry and return (see above)
2a6dbc
+
2a6dbc
+        # Uncomment this for debugging purposes:
2a6dbc
+        # print(out.decode('utf-8'))
2a6dbc
+
2a6dbc
+        #   Executing the cmdline-supplied "pass":
2a6dbc
+        #      0 python(8274): => <module> in <string>:1
2a6dbc
+        #      5 python(8274): <= <module> in <string>:1
2a6dbc
+        with ErrorDumper(out, err):
2a6dbc
+            self.assertIn(b'=> <module> in <string>:1', out,
2a6dbc
+                          msg="stdout: %s\nstderr: %s\n" % (out, err))
2a6dbc
+
2a6dbc
+    def test_function_encoding(self):
2a6dbc
+        # Ensure that function names containing non-Latin 1 code
2a6dbc
+        # points are handled:
2a6dbc
+        pythonfile = TESTFN
2a6dbc
+        try:
2a6dbc
+            unlink(pythonfile)
2a6dbc
+            f = open(pythonfile, "wb")
2a6dbc
+            f.write("""
2a6dbc
+# Sample script with non-ASCII filename, for use by test_systemtap.py
2a6dbc
+# Implicitly UTF-8
2a6dbc
+
2a6dbc
+def 文字化け():
2a6dbc
+    '''Function with non-ASCII identifier; I believe this reads "mojibake"'''
2a6dbc
+    print("hello world!")
2a6dbc
+
2a6dbc
+文字化け()
2a6dbc
+""".encode('utf-8'))
2a6dbc
+            f.close()
2a6dbc
+
2a6dbc
+            out, err = invoke_python_under_systemtap(hierarchy_script,
2a6dbc
+                                                     pythonfile=pythonfile)
2a6dbc
+            out_utf8 = out.decode('utf-8')
2a6dbc
+            with ErrorDumper(out, err):
2a6dbc
+                self.assertIn('=> <module> in %s:5' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn(' => 文字化け in %s:5' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn(' <= 文字化け in %s:7' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn('<= <module> in %s:9' % pythonfile, out_utf8)
2a6dbc
+        finally:
2a6dbc
+            unlink(pythonfile)
2a6dbc
+
2a6dbc
+    @unittest.skipIf(sys.getfilesystemencoding() == 'ascii',
2a6dbc
+                     'the test filename is not encodable with ASCII')
2a6dbc
+    def test_filename_encoding(self):
2a6dbc
+        # Ensure that scripts names containing non-Latin 1 code
2a6dbc
+        # points are handled:
2a6dbc
+        pythonfile = TESTFN + '_☠.py'
2a6dbc
+        try:
2a6dbc
+            unlink(pythonfile)
2a6dbc
+            f = open(pythonfile, "wb")
2a6dbc
+            f.write("""
2a6dbc
+def foo():
2a6dbc
+    '''Function with non-ASCII identifier; I believe this reads "mojibake"'''
2a6dbc
+    print("hello world!")
2a6dbc
+
2a6dbc
+foo()
2a6dbc
+""".encode('utf-8'))
2a6dbc
+            f.close()
2a6dbc
+
2a6dbc
+            out, err = invoke_python_under_systemtap(hierarchy_script,
2a6dbc
+                                                     pythonfile=pythonfile)
2a6dbc
+            out_utf8 = out.decode('utf-8')
2a6dbc
+            with ErrorDumper(out, err):
2a6dbc
+                self.assertIn('=> <module> in %s:2' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn(' => foo in %s:2' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn(' <= foo in %s:4' % pythonfile, out_utf8)
2a6dbc
+                self.assertIn('<= <module> in %s:6' % pythonfile, out_utf8)
2a6dbc
+        finally:
2a6dbc
+            unlink(pythonfile)
2a6dbc
+
2a6dbc
+def test_main():
2a6dbc
+    run_unittest(SystemtapTests)
2a6dbc
+
2a6dbc
+if __name__ == "__main__":
2a6dbc
+    test_main()
2a6dbc
diff -up Python-3.3.0rc2/Makefile.pre.in.systemtap Python-3.3.0rc2/Makefile.pre.in
2a6dbc
--- Python-3.3.0rc2/Makefile.pre.in.systemtap	2012-09-09 05:11:05.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/Makefile.pre.in	2012-09-10 09:19:51.195501518 -0400
2a6dbc
@@ -363,6 +363,7 @@ PYTHON_OBJS=	\
2a6dbc
 		Python/formatter_unicode.o \
2a6dbc
 		Python/fileutils.o \
2a6dbc
 		Python/$(DYNLOADFILE) \
2a6dbc
+		@SYSTEMTAPOBJS@ \
2a6dbc
 		$(LIBOBJS) \
2a6dbc
 		$(MACHDEP_OBJS) \
2a6dbc
 		$(THREADOBJ)
2a6dbc
@@ -713,7 +714,8 @@ Objects/setobject.o: $(srcdir)/Objects/s
2a6dbc
 $(OPCODETARGETS_H): $(OPCODETARGETGEN_FILES)
2a6dbc
 	$(OPCODETARGETGEN) $(OPCODETARGETS_H)
2a6dbc
 
2a6dbc
-Python/ceval.o: $(OPCODETARGETS_H) $(srcdir)/Python/ceval_gil.h
2a6dbc
+Python/ceval.o: $(OPCODETARGETS_H) $(srcdir)/Python/ceval_gil.h \
2a6dbc
+			$(srcdir)/Python/ceval_systemtap.h @SYSTEMTAPDEPS@
2a6dbc
 
2a6dbc
 Python/frozen.o: Python/importlib.h Python/importlib_external.h
2a6dbc
 
2a6dbc
@@ -724,6 +726,13 @@ Objects/typeobject.o: $(srcdir)/Objects/
2a6dbc
 Objects/typeslots.inc: $(srcdir)/Include/typeslots.h $(srcdir)/Objects/typeslots.py
2a6dbc
 	$(PYTHON) $(srcdir)/Objects/typeslots.py < $(srcdir)/Include/typeslots.h > Objects/typeslots.inc
2a6dbc
 
2a6dbc
+# Only needed with --with-systemtap; not a public header:
2a6dbc
+$(srcdir)/Python/pysystemtap.h: $(srcdir)/Python/pysystemtap.d
2a6dbc
+	dtrace -o $@ $(DFLAGS) -C -h -s $(srcdir)/Python/pysystemtap.d
2a6dbc
+
2a6dbc
+Python/pysystemtap.o: $(srcdir)/Python/pysystemtap.d Python/ceval.o
2a6dbc
+	dtrace -o $@ $(DFLAGS) -C -G -s $(srcdir)/Python/pysystemtap.d Python/ceval.o
2a6dbc
+
2a6dbc
 ############################################################################
2a6dbc
 # Header files
2a6dbc
 
2a6dbc
@@ -1345,6 +1354,7 @@ clean: pycremoval
2a6dbc
 	-rm -f Lib/lib2to3/*Grammar*.pickle
2a6dbc
 	-rm -f Programs/_testembed Programs/_freeze_importlib
2a6dbc
 	-rm -rf build
2a6dbc
+	-rm -f $(srcdir)/Python/pysystemtap.h
2a6dbc
 
2a6dbc
 profile-removal:
2a6dbc
 	find . -name '*.gc??' -exec rm -f {} ';'
2a6dbc
diff -up Python-3.3.0rc2/pyconfig.h.in.systemtap Python-3.3.0rc2/pyconfig.h.in
2a6dbc
--- Python-3.3.0rc2/pyconfig.h.in.systemtap	2012-09-09 05:11:14.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/pyconfig.h.in	2012-09-10 09:17:21.120511781 -0400
2a6dbc
@@ -1306,6 +1306,9 @@
2a6dbc
 /* Define if you want to compile in Python-specific mallocs */
2a6dbc
 #undef WITH_PYMALLOC
2a6dbc
 
2a6dbc
+/* Define if you want to compile in SystemTap support */
2a6dbc
+#undef WITH_SYSTEMTAP
2a6dbc
+
2a6dbc
 /* Define if you want to compile in rudimentary thread support */
2a6dbc
 #undef WITH_THREAD
2a6dbc
 
2a6dbc
diff -up Python-3.3.0rc2/Python/ceval.c.systemtap Python-3.3.0rc2/Python/ceval.c
2a6dbc
--- Python-3.3.0rc2/Python/ceval.c.systemtap	2012-09-09 05:11:12.000000000 -0400
2a6dbc
+++ Python-3.3.0rc2/Python/ceval.c	2012-09-10 09:17:21.122511781 -0400
2a6dbc
@@ -18,6 +18,8 @@
2a6dbc
 
2a6dbc
 #include <ctype.h>
2a6dbc
 
2a6dbc
+#include "ceval_systemtap.h"
2a6dbc
+
2a6dbc
 #ifndef WITH_TSC
2a6dbc
 
2a6dbc
 #define READ_TIMESTAMP(var)
2a6dbc
@@ -1160,6 +1162,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int
2a6dbc
         }
2a6dbc
     }
2a6dbc
 
2a6dbc
+    if (PYTHON_FUNCTION_ENTRY_ENABLED()) {
2a6dbc
+        systemtap_function_entry(f);
2a6dbc
+    }
2a6dbc
+
2a6dbc
     co = f->f_code;
2a6dbc
     names = co->co_names;
2a6dbc
     consts = co->co_consts;
2a6dbc
@@ -3077,6 +3083,11 @@ fast_yield:
2a6dbc
 
2a6dbc
     /* pop frame */
2a6dbc
 exit_eval_frame:
2a6dbc
+
2a6dbc
+    if (PYTHON_FUNCTION_RETURN_ENABLED()) {
2a6dbc
+        systemtap_function_return(f);
2a6dbc
+    }
2a6dbc
+
2a6dbc
     Py_LeaveRecursiveCall();
2a6dbc
     f->f_executing = 0;
2a6dbc
     tstate->frame = f->f_back;
2a6dbc
diff -up Python-3.3.0rc2/Python/ceval_systemtap.h.systemtap Python-3.3.0rc2/Python/ceval_systemtap.h
2a6dbc
--- Python-3.3.0rc2/Python/ceval_systemtap.h.systemtap	2012-09-10 09:17:21.122511781 -0400
2a6dbc
+++ Python-3.3.0rc2/Python/ceval_systemtap.h	2012-09-10 09:17:21.122511781 -0400
2a6dbc
@@ -0,0 +1,86 @@
2a6dbc
+/*
2a6dbc
+  Support for SystemTap static markers  
2a6dbc
+*/
2a6dbc
+
2a6dbc
+#ifdef WITH_SYSTEMTAP
2a6dbc
+
2a6dbc
+#include "pysystemtap.h"
2a6dbc
+
2a6dbc
+/*
2a6dbc
+  A struct to hold all of the information gathered when one of the traceable
2a6dbc
+  markers is triggered
2a6dbc
+*/
2a6dbc
+struct frame_marker_info
2a6dbc
+{
2a6dbc
+    PyObject *filename_obj;
2a6dbc
+    PyObject *funcname_obj;
2a6dbc
+    const char *filename;
2a6dbc
+    const char *funcname;
2a6dbc
+    int lineno;
2a6dbc
+};
2a6dbc
+
2a6dbc
+static void
2a6dbc
+get_frame_marker_info(PyFrameObject *f, struct frame_marker_info *fmi)
2a6dbc
+{
2a6dbc
+    PyObject *ptype;
2a6dbc
+    PyObject *pvalue;
2a6dbc
+    PyObject *ptraceback;
2a6dbc
+
2a6dbc
+    PyErr_Fetch(&ptype, &pvalue, &ptraceback);
2a6dbc
+
2a6dbc
+    fmi->filename_obj = PyUnicode_EncodeFSDefault(f->f_code->co_filename);
2a6dbc
+    if (fmi->filename_obj) {
2a6dbc
+        fmi->filename = PyBytes_AsString(fmi->filename_obj);
2a6dbc
+    } else {
2a6dbc
+        fmi->filename = NULL;
2a6dbc
+    }
2a6dbc
+
2a6dbc
+    fmi->funcname_obj = PyUnicode_AsUTF8String(f->f_code->co_name);
2a6dbc
+    if (fmi->funcname_obj) {
2a6dbc
+        fmi->funcname = PyBytes_AsString(fmi->funcname_obj);
2a6dbc
+    } else {
2a6dbc
+        fmi->funcname = NULL;
2a6dbc
+    }
2a6dbc
+
2a6dbc
+    fmi->lineno = PyCode_Addr2Line(f->f_code, f->f_lasti);
2a6dbc
+
2a6dbc
+    PyErr_Restore(ptype, pvalue, ptraceback);
2a6dbc
+
2a6dbc
+}
2a6dbc
+
2a6dbc
+static void
2a6dbc
+release_frame_marker_info(struct frame_marker_info *fmi)
2a6dbc
+{
2a6dbc
+    Py_XDECREF(fmi->filename_obj);
2a6dbc
+    Py_XDECREF(fmi->funcname_obj);
2a6dbc
+}
2a6dbc
+
2a6dbc
+static void
2a6dbc
+systemtap_function_entry(PyFrameObject *f)
2a6dbc
+{
2a6dbc
+    struct frame_marker_info fmi;
2a6dbc
+    get_frame_marker_info(f, &fmi);
2a6dbc
+    PYTHON_FUNCTION_ENTRY(fmi.filename, fmi.funcname, fmi.lineno, f);
2a6dbc
+    release_frame_marker_info(&fmi);
2a6dbc
+}
2a6dbc
+
2a6dbc
+static void
2a6dbc
+systemtap_function_return(PyFrameObject *f)
2a6dbc
+{
2a6dbc
+    struct frame_marker_info fmi;
2a6dbc
+    get_frame_marker_info(f, &fmi);
2a6dbc
+    PYTHON_FUNCTION_RETURN(fmi.filename, fmi.funcname, fmi.lineno, f);
2a6dbc
+    release_frame_marker_info(&fmi);
2a6dbc
+}
2a6dbc
+
2a6dbc
+#else /* #ifdef WITH_SYSTEMTAP */
2a6dbc
+
2a6dbc
+/*
2a6dbc
+  When configured --without-systemtap, everything compiles away to nothing:
2a6dbc
+*/
2a6dbc
+#define PYTHON_FUNCTION_ENTRY_ENABLED() 0
2a6dbc
+#define PYTHON_FUNCTION_RETURN_ENABLED() 0
2a6dbc
+#define systemtap_function_entry(f)
2a6dbc
+#define systemtap_function_return(f)
2a6dbc
+
2a6dbc
+#endif
2a6dbc
diff -up Python-3.3.0rc2/Python/pysystemtap.d.systemtap Python-3.3.0rc2/Python/pysystemtap.d
2a6dbc
--- Python-3.3.0rc2/Python/pysystemtap.d.systemtap	2012-09-10 09:17:21.122511781 -0400
2a6dbc
+++ Python-3.3.0rc2/Python/pysystemtap.d	2012-09-10 09:17:21.122511781 -0400
2a6dbc
@@ -0,0 +1,4 @@
2a6dbc
+provider python {
2a6dbc
+    probe function__entry(const char *, const char *, int, PyFrameObject *);
2a6dbc
+    probe function__return(const char *, const char *, int, PyFrameObject *);
2a6dbc
+};