Blob Blame History Raw
commit 14ffcd934e1c5099b471f4e73da32d1b32bac7e6
Author: Andreas Gerstmayr <agerstmayr@redhat.com>
Date:   Mon Dec 13 20:10:40 2021 +0100

    pmdabcc: sync bcc PMDA modules with upstream bcc tools

diff --git a/src/pmdas/bcc/modules/execsnoop.bpf b/src/pmdas/bcc/modules/execsnoop.bpf
index f69200773..aa755b3a1 100644
--- a/src/pmdas/bcc/modules/execsnoop.bpf
+++ b/src/pmdas/bcc/modules/execsnoop.bpf
@@ -4,40 +4,57 @@
 #include <uapi/linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
+
 #define ARGSIZE  128
+
 enum event_type {
     EVENT_ARG,
     EVENT_RET,
 };
+
 struct data_t {
     u32 pid;  // PID as in the userspace term (i.e. task->tgid in kernel)
     u32 ppid; // Parent PID as in the userspace term (i.e task->real_parent->tgid in kernel)
+    u32 uid;
     char comm[TASK_COMM_LEN];
     enum event_type type;
     char argv[ARGSIZE];
     int retval;
 };
+
 BPF_PERF_OUTPUT(events);
+
 static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
 {
-    bpf_probe_read(data->argv, sizeof(data->argv), ptr);
+    bpf_probe_read_user(data->argv, sizeof(data->argv), ptr);
     events.perf_submit(ctx, data, sizeof(struct data_t));
     return 1;
 }
+
 static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
 {
     const char *argp = NULL;
-    bpf_probe_read(&argp, sizeof(argp), ptr);
+    bpf_probe_read_user(&argp, sizeof(argp), ptr);
     if (argp) {
         return __submit_arg(ctx, (void *)(argp), data);
     }
     return 0;
 }
+
 int syscall__execve(struct pt_regs *ctx,
     const char __user *filename,
     const char __user *const __user *__argv,
     const char __user *const __user *__envp)
 {
+
+    u32 uid = bpf_get_current_uid_gid() & 0xffffffff;
+
+    UID_FILTER
+
+    if (container_should_be_filtered()) {
+        return 0;
+    }
+
     // create data here and pass to submit_arg to save stack space (#555)
     struct data_t data = {};
     struct task_struct *task;
@@ -52,25 +69,37 @@ int syscall__execve(struct pt_regs *ctx,
 
     bpf_get_current_comm(&data.comm, sizeof(data.comm));
     data.type = EVENT_ARG;
+
     __submit_arg(ctx, (void *)filename, &data);
+
     // skip first arg, as we submitted filename
     #pragma unroll
     for (int i = 1; i < MAXARG; i++) {
         if (submit_arg(ctx, (void *)&__argv[i], &data) == 0)
              goto out;
     }
+
     // handle truncated argument list
     char ellipsis[] = "...";
     __submit_arg(ctx, (void *)ellipsis, &data);
 out:
     return 0;
 }
+
 int do_ret_sys_execve(struct pt_regs *ctx)
 {
+    if (container_should_be_filtered()) {
+        return 0;
+    }
+
     struct data_t data = {};
     struct task_struct *task;
 
+    u32 uid = bpf_get_current_uid_gid() & 0xffffffff;
+    UID_FILTER
+
     data.pid = bpf_get_current_pid_tgid() >> 32;
+    data.uid = uid;
 
     task = (struct task_struct *)bpf_get_current_task();
     // Some kernels, like Ubuntu 4.13.0-generic, return 0
@@ -82,5 +111,6 @@ int do_ret_sys_execve(struct pt_regs *ctx)
     data.type = EVENT_RET;
     data.retval = PT_REGS_RC(ctx);
     events.perf_submit(ctx, &data, sizeof(data));
+
     return 0;
 }
diff --git a/src/pmdas/bcc/modules/execsnoop.python b/src/pmdas/bcc/modules/execsnoop.python
index 54382fa9b..1127cc471 100644
--- a/src/pmdas/bcc/modules/execsnoop.python
+++ b/src/pmdas/bcc/modules/execsnoop.python
@@ -44,20 +44,6 @@ MODULE = 'execsnoop'
 BASENS = 'proc.exec.'
 units_none = pmUnits(0, 0, 0, 0, 0, 0)
 
-TASK_COMM_LEN = 16      # linux/sched.h
-ARGSIZE = 128           # should match #define in execsnoop.bpf
-
-class Data(ct.Structure):
-    """ execsnoop data struct """
-    _fields_ = [
-        ("pid", ct.c_uint),
-        ("ppid", ct.c_uint),
-        ("comm", ct.c_char * TASK_COMM_LEN),
-        ("type", ct.c_int),
-        ("argv", ct.c_char * ARGSIZE),
-        ("retval", ct.c_int),
-    ]
-
 class EventType(object):
     """ Event type """
     EVENT_ARG = 0
@@ -137,7 +123,7 @@ class PCPBCCModule(PCPBCCBase):
 
     def handle_event(self, _cpu, data, _size):
         """ Event handler """
-        event = ct.cast(data, ct.POINTER(Data)).contents
+        event = self.bpf["events"].event(data)
         skip = False
 
         if event.type == EventType.EVENT_ARG:
@@ -145,9 +131,9 @@ class PCPBCCModule(PCPBCCBase):
         elif event.type == EventType.EVENT_RET:
             if event.retval != 0 and not self.include_failed:
                 skip = True
-            if self.command and not re.search(self.command, event.comm):
+            if self.command and not re.search(bytes(self.command), event.comm):
                 skip = True
-            if self.args and not re.search(self.args, b" ".join(self.argv_cache[event.pid])):
+            if self.args and not re.search(bytes(self.args), b" ".join(self.argv_cache[event.pid])):
                 skip = True
 
             if not skip:
@@ -177,10 +163,14 @@ class PCPBCCModule(PCPBCCBase):
 
                 self.bpf_text = self.bpf_text.replace("MAXARG", str(self.max_args))
 
+            bpf_text = self.bpf_text
+            bpf_text = bpf_text.replace('UID_FILTER', '')
+            bpf_text = bpf_text.replace('container_should_be_filtered()', '0')
+
             if self.debug:
-                self.log("BPF to be compiled:\n" + self.bpf_text.strip())
+                self.log("BPF to be compiled:\n" + bpf_text.strip())
 
-            self.bpf = BPF(text=self.bpf_text)
+            self.bpf = BPF(text=bpf_text)
             execve_fnname = self.get_syscall_fnname("execve")
             self.bpf.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
             self.bpf.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")
diff --git a/src/pmdas/bcc/modules/pcpbcc.python b/src/pmdas/bcc/modules/pcpbcc.python
index 0555dc33f..62783b7fc 100644
--- a/src/pmdas/bcc/modules/pcpbcc.python
+++ b/src/pmdas/bcc/modules/pcpbcc.python
@@ -14,6 +14,7 @@
 """ PCP BCC PMDA module base class """
 
 import re
+import platform
 import ctypes as ct
 from os import kill, listdir, path
 from collections import OrderedDict
@@ -348,6 +349,16 @@ class PCPBCCBase(object):
         """ Returns BCC version as an int tuple (for comparisons) """
         return tuple(map(int, PCPBCCBase.bcc_version().split('.')))
 
+    @staticmethod
+    def kernel_version():
+        """Returns the kernel version"""
+        version_str = platform.release()
+        m = re.match(r'^(\d+)\.(\d+)\.(\d+)', version_str)
+        if m:
+            return tuple(map(int, m.groups()))
+        else:
+            return (0, 0, 0)
+
     def perf_buffer_poller(self):
         """ BPF poller """
         try:
diff --git a/src/pmdas/bcc/modules/runqlat.python b/src/pmdas/bcc/modules/runqlat.python
index 27007c7e5..1c6c6b4b0 100644
--- a/src/pmdas/bcc/modules/runqlat.python
+++ b/src/pmdas/bcc/modules/runqlat.python
@@ -30,7 +30,11 @@ from modules.pcpbcc import PCPBCCBase
 #
 # BPF program
 #
-bpf_src = "modules/runqlat.bpf"
+is_support_raw_tp = BPF.support_raw_tracepoint()
+if is_support_raw_tp:
+    bpf_src = "modules/runqlat_tp.bpf"
+else:
+    bpf_src = "modules/runqlat_kp.bpf"
 
 #
 # PCP BCC PMDA constants
@@ -59,6 +63,7 @@ class PCPBCCModule(PCPBCCBase):
                 self.proc_filter = self.config.get(MODULE, opt)
                 self.update_pids(self.get_proc_info(self.proc_filter))
 
+        self.log("Using BPF source file %s." % bpf_src)
         self.log("Initialized.")
 
     def metrics(self):
@@ -89,7 +94,23 @@ class PCPBCCModule(PCPBCCBase):
                 with open(path.dirname(__file__) + '/../' + bpf_src) as src:
                     self.bpf_text = src.read()
 
+                # BPF.kernel_struct_has_field requires BCC v0.23.0
+                # use kernel version check as alternative
+                # pylint: disable=no-member
+                if (
+                    hasattr(BPF, "kernel_struct_has_field")
+                    and BPF.kernel_struct_has_field(b"task_struct", b"__state") == 1
+                ) or self.kernel_version() >= (5, 14, 0):
+                    self.bpf_text = self.bpf_text.replace('STATE_FIELD', '__state')
+                else:
+                    self.bpf_text = self.bpf_text.replace('STATE_FIELD', 'state')
+
                 self.bpf_text = self.bpf_text.replace("FILTER", "PID_CHECK")
+                self.bpf_text = self.bpf_text.replace('FACTOR', 'delta /= 1000;')
+
+                self.bpf_text = self.bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);')
+                self.bpf_text = self.bpf_text.replace('STORE',
+                    'dist.increment(bpf_log2l(delta));')
 
             if not self.pids and self.proc_filter and self.proc_refresh:
                 self.log("No process to attach found, activation postponed.")
@@ -102,9 +123,11 @@ class PCPBCCModule(PCPBCCBase):
 
             self.reset_cache()
             self.bpf = BPF(text=bpf_text)
-            self.bpf.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup")
-            self.bpf.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task")
-            self.bpf.attach_kprobe(event_re=r"^finish_task_switch$|^finish_task_switch\.isra\.\d$", fn_name="trace_run")
+            if not is_support_raw_tp:
+                self.bpf.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup")
+                self.bpf.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task")
+                self.bpf.attach_kprobe(event_re=r"^finish_task_switch$|^finish_task_switch\.isra\.\d$",
+                                       fn_name="trace_run")
             self.log("Compiled.")
         except Exception as error: # pylint: disable=broad-except
             self.bpf = None
diff --git a/src/pmdas/bcc/modules/runqlat.bpf b/src/pmdas/bcc/modules/runqlat_kp.bpf
similarity index 54%
rename from src/pmdas/bcc/modules/runqlat.bpf
rename to src/pmdas/bcc/modules/runqlat_kp.bpf
index a3664a035..dd643d600 100644
--- a/src/pmdas/bcc/modules/runqlat.bpf
+++ b/src/pmdas/bcc/modules/runqlat_kp.bpf
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
 #include <linux/pid_namespace.h>
+#include <linux/init_task.h>
 
 typedef struct pid_key {
     u64 id;    // work around
@@ -17,7 +18,7 @@ typedef struct pidns_key {
 } pidns_key_t;
 
 BPF_HASH(start, u32);
-BPF_HISTOGRAM(dist);
+STORAGE
 
 struct rq;
 
@@ -31,6 +32,45 @@ static int trace_enqueue(u32 tgid, u32 pid)
     return 0;
 }
 
+static __always_inline unsigned int pid_namespace(struct task_struct *task)
+{
+
+/* pids[] was removed from task_struct since commit 2c4704756cab7cfa031ada4dab361562f0e357c0
+ * Using the macro INIT_PID_LINK as a conditional judgment.
+ */
+#ifdef INIT_PID_LINK
+    struct pid_link pids;
+    unsigned int level;
+    struct upid upid;
+    struct ns_common ns;
+
+    /*  get the pid namespace by following task_active_pid_ns(),
+     *  pid->numbers[pid->level].ns
+     */
+    bpf_probe_read_kernel(&pids, sizeof(pids), &task->pids[PIDTYPE_PID]);
+    bpf_probe_read_kernel(&level, sizeof(level), &pids.pid->level);
+    bpf_probe_read_kernel(&upid, sizeof(upid), &pids.pid->numbers[level]);
+    bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns);
+
+    return ns.inum;
+#else
+    struct pid *pid;
+    unsigned int level;
+    struct upid upid;
+    struct ns_common ns;
+
+    /*  get the pid namespace by following task_active_pid_ns(),
+     *  pid->numbers[pid->level].ns
+     */
+    bpf_probe_read_kernel(&pid, sizeof(pid), &task->thread_pid);
+    bpf_probe_read_kernel(&level, sizeof(level), &pid->level);
+    bpf_probe_read_kernel(&upid, sizeof(upid), &pid->numbers[level]);
+    bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns);
+
+    return ns.inum;
+#endif
+}
+
 int trace_wake_up_new_task(struct pt_regs *ctx, struct task_struct *p)
 {
     return trace_enqueue(p->tgid, p->pid);
@@ -48,7 +88,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev)
     u32 pid, tgid;
 
     // ivcsw: treat like an enqueue event and store timestamp
-    if (prev->state == TASK_RUNNING) {
+    if (prev->STATE_FIELD == TASK_RUNNING) {
         tgid = prev->tgid;
         pid = prev->pid;
         if (!(FILTER || pid == 0)) {
@@ -69,10 +109,10 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev)
         return 0;   // missed enqueue
     }
     delta = bpf_ktime_get_ns() - *tsp;
-    delta /= 1000;
+    FACTOR
 
     // store as histogram
-    dist.increment(bpf_log2l(delta));
+    STORE
 
     start.delete(&pid);
     return 0;
diff --git a/src/pmdas/bcc/modules/runqlat_tp.bpf b/src/pmdas/bcc/modules/runqlat_tp.bpf
new file mode 100644
index 000000000..f0e9ce69b
--- /dev/null
+++ b/src/pmdas/bcc/modules/runqlat_tp.bpf
@@ -0,0 +1,124 @@
+// Copyright 2016 Netflix, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License")
+
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
+#include <linux/init_task.h>
+
+typedef struct pid_key {
+    u64 id;    // work around
+    u64 slot;
+} pid_key_t;
+
+typedef struct pidns_key {
+    u64 id;    // work around
+    u64 slot;
+} pidns_key_t;
+
+BPF_HASH(start, u32);
+STORAGE
+
+struct rq;
+
+// record enqueue timestamp
+static int trace_enqueue(u32 tgid, u32 pid)
+{
+    if (FILTER || pid == 0)
+        return 0;
+    u64 ts = bpf_ktime_get_ns();
+    start.update(&pid, &ts);
+    return 0;
+}
+
+static __always_inline unsigned int pid_namespace(struct task_struct *task)
+{
+
+/* pids[] was removed from task_struct since commit 2c4704756cab7cfa031ada4dab361562f0e357c0
+ * Using the macro INIT_PID_LINK as a conditional judgment.
+ */
+#ifdef INIT_PID_LINK
+    struct pid_link pids;
+    unsigned int level;
+    struct upid upid;
+    struct ns_common ns;
+
+    /*  get the pid namespace by following task_active_pid_ns(),
+     *  pid->numbers[pid->level].ns
+     */
+    bpf_probe_read_kernel(&pids, sizeof(pids), &task->pids[PIDTYPE_PID]);
+    bpf_probe_read_kernel(&level, sizeof(level), &pids.pid->level);
+    bpf_probe_read_kernel(&upid, sizeof(upid), &pids.pid->numbers[level]);
+    bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns);
+
+    return ns.inum;
+#else
+    struct pid *pid;
+    unsigned int level;
+    struct upid upid;
+    struct ns_common ns;
+
+    /*  get the pid namespace by following task_active_pid_ns(),
+     *  pid->numbers[pid->level].ns
+     */
+    bpf_probe_read_kernel(&pid, sizeof(pid), &task->thread_pid);
+    bpf_probe_read_kernel(&level, sizeof(level), &pid->level);
+    bpf_probe_read_kernel(&upid, sizeof(upid), &pid->numbers[level]);
+    bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns);
+
+    return ns.inum;
+#endif
+}
+
+RAW_TRACEPOINT_PROBE(sched_wakeup)
+{
+    // TP_PROTO(struct task_struct *p)
+    struct task_struct *p = (struct task_struct *)ctx->args[0];
+    return trace_enqueue(p->tgid, p->pid);
+}
+
+RAW_TRACEPOINT_PROBE(sched_wakeup_new)
+{
+    // TP_PROTO(struct task_struct *p)
+    struct task_struct *p = (struct task_struct *)ctx->args[0];
+    return trace_enqueue(p->tgid, p->pid);
+}
+
+RAW_TRACEPOINT_PROBE(sched_switch)
+{
+    // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
+    struct task_struct *prev = (struct task_struct *)ctx->args[1];
+    struct task_struct *next = (struct task_struct *)ctx->args[2];
+    u32 pid, tgid;
+
+    // ivcsw: treat like an enqueue event and store timestamp
+    if (prev->STATE_FIELD == TASK_RUNNING) {
+        tgid = prev->tgid;
+        pid = prev->pid;
+        if (!(FILTER || pid == 0)) {
+            u64 ts = bpf_ktime_get_ns();
+            start.update(&pid, &ts);
+        }
+    }
+
+    tgid = next->tgid;
+    pid = next->pid;
+    if (FILTER || pid == 0)
+        return 0;
+    u64 *tsp, delta;
+
+    // fetch timestamp and calculate delta
+    tsp = start.lookup(&pid);
+    if (tsp == 0) {
+        return 0;   // missed enqueue
+    }
+    delta = bpf_ktime_get_ns() - *tsp;
+    FACTOR
+
+    // store as histogram
+    STORE
+
+    start.delete(&pid);
+    return 0;
+}
diff --git a/src/pmdas/bcc/modules/tcplife.python b/src/pmdas/bcc/modules/tcplife.python
index 0c6f17c36..02c693a6a 100644
--- a/src/pmdas/bcc/modules/tcplife.python
+++ b/src/pmdas/bcc/modules/tcplife.python
@@ -37,16 +37,11 @@ from modules.pcpbcc import PCPBCCBase
 #
 # BPF program
 #
-bpf_src = "modules/tcplife.bpf"
-# Compat with kernel < 4.16, bcc < 0.6
-TRACEFS = "/sys/kernel/debug/tracing"
-bpf_src_old_tp = "modules/tcplife_old_tp.bpf"
-bpf_src_old_kb = "modules/tcplife_old_kb.bpf"
-if not path.exists(TRACEFS + "/events/sock/inet_sock_set_state"):
-    if path.exists(TRACEFS + "/events/tcp/tcp_set_state"):
-        bpf_src = bpf_src_old_tp
-    else:
-        bpf_src = bpf_src_old_kb
+if BPF.tracepoint_exists("sock", "inet_sock_set_state"):
+    bpf_src = "modules/tcplife_tp.bpf"
+else:
+    bpf_src = "modules/tcplife_kp.bpf"
+
 
 #
 # PCP BCC PMDA constants
@@ -57,35 +52,6 @@ units_bytes = pmUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0)
 units_usecs = pmUnits(0, 1, 0, 0, PM_TIME_USEC, 0)
 units_none = pmUnits(0, 0, 0, 0, 0, 0)
 
-TASK_COMM_LEN = 16      # linux/sched.h
-
-class Data_ipv4(ct.Structure):
-    """ IPv4 data struct """
-    _fields_ = [
-        ("ts_us", ct.c_ulonglong),
-        ("pid", ct.c_ulonglong),
-        ("saddr", ct.c_ulonglong),
-        ("daddr", ct.c_ulonglong),
-        ("ports", ct.c_ulonglong),
-        ("rx_b", ct.c_ulonglong),
-        ("tx_b", ct.c_ulonglong),
-        ("span_us", ct.c_ulonglong),
-        ("task", ct.c_char * TASK_COMM_LEN)
-    ]
-
-class Data_ipv6(ct.Structure):
-    """ IPv6 data struct """
-    _fields_ = [
-        ("ts_us", ct.c_ulonglong),
-        ("pid", ct.c_ulonglong),
-        ("saddr", (ct.c_ulonglong * 2)),
-        ("daddr", (ct.c_ulonglong * 2)),
-        ("ports", ct.c_ulonglong),
-        ("rx_b", ct.c_ulonglong),
-        ("tx_b", ct.c_ulonglong),
-        ("span_us", ct.c_ulonglong),
-        ("task", ct.c_char * TASK_COMM_LEN)
-    ]
 
 #
 # PCP BCC Module
@@ -129,24 +95,22 @@ class PCPBCCModule(PCPBCCBase):
         self.lock = Lock()
         self.thread = None
 
-        # Compat with kernel < 4.16
         self.log("Using BPF source file %s." % bpf_src)
 
         # Exit hard if impossible to continue
-        if self.bcc_version() == "0.6.1" and bpf_src == bpf_src_old_kb:
-            raise RuntimeError("BCC 0.6.1 bug makes it incompatible with this module "
-                               "on kernel < 4.15.")
+        if self.bcc_version_tuple() < (0, 6, 1):
+            raise RuntimeError("BCC 0.6.1+ is required for this module.")
 
         self.log("Initialized.")
 
     def handle_ip_event(self, data, version):
         """ IP event handler """
         if version == 4:
-            event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
+            event = self.bpf["ipv4_events"].event(data)
             laddr = inet_ntop(AF_INET, pack("I", event.saddr))
             daddr = inet_ntop(AF_INET, pack("I", event.daddr))
         else:
-            event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
+            event = self.bpf["ipv6_events"].event(data)
             laddr = inet_ntop(AF_INET6, event.saddr)
             daddr = inet_ntop(AF_INET6, event.daddr)
 
@@ -205,31 +169,25 @@ class PCPBCCModule(PCPBCCBase):
             if not self.bpf_text:
                 with open(path.dirname(__file__) + '/../' + bpf_src) as src:
                     self.bpf_text = src.read()
-                # Compat with bcc < 0.6
-                self.log("Testing BCC compatilibility, possible errors below are safe to ignore.")
-                try:
-                    test_txt = self.bpf_text.replace("// NEW: ", "").replace("FILTER_PID", "")
-                    test_bpf = BPF(text=test_txt)
-                    test_bpf.cleanup()
-                    self.bpf_text = self.bpf_text.replace("// NEW: ", "")
-                except Exception: # pylint: disable=broad-except
-                    self.bpf_text = self.bpf_text.replace("// OLD: ", "")
-                self.log("Tested BCC compatilibility, possible errors above are safe to ignore.")
 
                 if self.dports:
                     filterp = " && ".join(["dport != %d" % port for port in self.dports])
                     filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp
-                    self.bpf_text = self.bpf_text.replace("//FILTER_DPORT", filter_txt)
+                    self.bpf_text = self.bpf_text.replace("FILTER_DPORT", filter_txt)
                 if self.lports:
                     filterp = " && ".join(["lport != %d" % port for port in self.lports])
                     filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp
-                    self.bpf_text = self.bpf_text.replace("//FILTER_LPORT", filter_txt)
+                    self.bpf_text = self.bpf_text.replace("FILTER_LPORT", filter_txt)
 
             if not self.pids and self.proc_filter and self.proc_refresh:
                 self.log("No process to attach found, activation postponed.")
                 return
 
             bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False)
+            bpf_text = bpf_text.replace('FILTER_PID', '')
+            bpf_text = bpf_text.replace('FILTER_DPORT', '')
+            bpf_text = bpf_text.replace('FILTER_LPORT', '')
+            bpf_text = bpf_text.replace('FILTER_FAMILY', '')
 
             if self.debug:
                 self.log("BPF to be compiled:\n" + bpf_text.strip())
diff --git a/src/pmdas/bcc/modules/tcplife_old_kb.bpf b/src/pmdas/bcc/modules/tcplife_kp.bpf
similarity index 81%
rename from src/pmdas/bcc/modules/tcplife_old_kb.bpf
rename to src/pmdas/bcc/modules/tcplife_kp.bpf
index eed01941a..5486c6a37 100644
--- a/src/pmdas/bcc/modules/tcplife_old_kb.bpf
+++ b/src/pmdas/bcc/modules/tcplife_kp.bpf
@@ -2,7 +2,6 @@
 // Licensed under the Apache License, Version 2.0 (the "License")
 
 #include <uapi/linux/ptrace.h>
-#define KBUILD_MODNAME "pcpbcctcplife"
 #include <linux/tcp.h>
 #include <net/sock.h>
 #include <bcc/proto.h>
@@ -11,11 +10,10 @@ BPF_HASH(birth, struct sock *, u64);
 
 // separate data structs for ipv4 and ipv6
 struct ipv4_data_t {
-    // XXX: switch some to u32's when supported
     u64 ts_us;
-    u64 pid;
-    u64 saddr;
-    u64 daddr;
+    u32 pid;
+    u32 saddr;
+    u32 daddr;
     u64 ports;
     u64 rx_b;
     u64 tx_b;
@@ -26,7 +24,7 @@ BPF_PERF_OUTPUT(ipv4_events);
 
 struct ipv6_data_t {
     u64 ts_us;
-    u64 pid;
+    u32 pid;
     unsigned __int128 saddr;
     unsigned __int128 daddr;
     u64 ports;
@@ -49,12 +47,12 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
 
     // lport is either used in a filter here, or later
     u16 lport = sk->__sk_common.skc_num;
-    //FILTER_LPORT
+    FILTER_LPORT
 
     // dport is either used in a filter here, or later
     u16 dport = sk->__sk_common.skc_dport;
     dport = ntohs(dport);
-    //FILTER_DPORT
+    FILTER_DPORT
 
     /*
      * This tool includes PID and comm context. It's best effort, and may
@@ -74,6 +72,9 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
          * sets ESTABLISHED without a tcp_set_state() call. Until we know
          * that for sure, match all early states to increase chances a
          * timestamp is set.
+         * Note that this needs to be set before the PID filter later on,
+         * since the PID isn't reliable for these early stages, so we must
+         * save all timestamps and do the PID filter later when we can.
          */
         u64 ts = bpf_ktime_get_ns();
         birth.update(&sk, &ts);
@@ -101,7 +102,7 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
     delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
     birth.delete(&sk);
 
-    // fetch possible cached data
+    // fetch possible cached data, and filter
     struct id_t *mep;
     mep = whoami.lookup(&sk);
     if (mep != 0)
@@ -116,9 +117,13 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
 
     u16 family = sk->__sk_common.skc_family;
 
+    FILTER_FAMILY
+
     if (family == AF_INET) {
-        struct ipv4_data_t data4 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
+        struct ipv4_data_t data4 = {};
+        data4.span_us = delta_us;
+        data4.rx_b = rx_b;
+        data4.tx_b = tx_b;
         data4.ts_us = bpf_ktime_get_ns() / 1000;
         data4.saddr = sk->__sk_common.skc_rcv_saddr;
         data4.daddr = sk->__sk_common.skc_daddr;
@@ -128,17 +133,19 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
         if (mep == 0) {
             bpf_get_current_comm(&data4.task, sizeof(data4.task));
         } else {
-            bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task);
+            bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task);
         }
         ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
 
     } else /* 6 */ {
-        struct ipv6_data_t data6 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
+        struct ipv6_data_t data6 = {};
+        data6.span_us = delta_us;
+        data6.rx_b = rx_b;
+        data6.tx_b = tx_b;
         data6.ts_us = bpf_ktime_get_ns() / 1000;
-        bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
+        bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
             sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
-        bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
+        bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
             sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
         // a workaround until data6 compiles with separate lport/dport
         data6.ports = dport + ((0ULL + lport) << 32);
@@ -146,7 +153,7 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
         if (mep == 0) {
             bpf_get_current_comm(&data6.task, sizeof(data6.task));
         } else {
-            bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task);
+            bpf_probe_read_kernel(&data6.task, sizeof(data6.task), (void *)mep->task);
         }
         ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
     }
diff --git a/src/pmdas/bcc/modules/tcplife_old_tp.bpf b/src/pmdas/bcc/modules/tcplife_old_tp.bpf
deleted file mode 100644
index a7c9c625c..000000000
--- a/src/pmdas/bcc/modules/tcplife_old_tp.bpf
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2016 Netflix, Inc.
-// Licensed under the Apache License, Version 2.0 (the "License")
-
-#include <uapi/linux/ptrace.h>
-#define KBUILD_MODNAME "pcpbcctcplife"
-#include <linux/tcp.h>
-#include <net/sock.h>
-#include <bcc/proto.h>
-
-BPF_HASH(birth, struct sock *, u64);
-
-// separate data structs for ipv4 and ipv6
-struct ipv4_data_t {
-    // XXX: switch some to u32's when supported
-    u64 ts_us;
-    u64 pid;
-    u64 saddr;
-    u64 daddr;
-    u64 ports;
-    u64 rx_b;
-    u64 tx_b;
-    u64 span_us;
-    char task[TASK_COMM_LEN];
-};
-BPF_PERF_OUTPUT(ipv4_events);
-
-struct ipv6_data_t {
-    u64 ts_us;
-    u64 pid;
-    unsigned __int128 saddr;
-    unsigned __int128 daddr;
-    u64 ports;
-    u64 rx_b;
-    u64 tx_b;
-    u64 span_us;
-    char task[TASK_COMM_LEN];
-};
-BPF_PERF_OUTPUT(ipv6_events);
-
-struct id_t {
-    u32 pid;
-    char task[TASK_COMM_LEN];
-};
-BPF_HASH(whoami, struct sock *, struct id_t);
-
-TRACEPOINT_PROBE(tcp, tcp_set_state)
-{
-    u32 pid = bpf_get_current_pid_tgid() >> 32;
-    // sk is mostly used as a UUID, once for skc_family, and two tcp stats:
-    struct sock *sk = (struct sock *)args->skaddr;
-
-    // lport is either used in a filter here, or later
-    u16 lport = args->sport;
-    //FILTER_LPORT
-
-    // dport is either used in a filter here, or later
-    u16 dport = args->dport;
-    //FILTER_DPORT
-
-    /*
-     * This tool includes PID and comm context. It's best effort, and may
-     * be wrong in some situations. It currently works like this:
-     * - record timestamp on any state < TCP_FIN_WAIT1
-     * - cache task context on:
-     *       TCP_SYN_SENT: tracing from client
-     *       TCP_LAST_ACK: client-closed from server
-     * - do output on TCP_CLOSE:
-     *       fetch task context if cached, or use current task
-     */
-
-    // capture birth time
-    if (args->newstate < TCP_FIN_WAIT1) {
-        /*
-         * Matching just ESTABLISHED may be sufficient, provided no code-path
-         * sets ESTABLISHED without a tcp_set_state() call. Until we know
-         * that for sure, match all early states to increase chances a
-         * timestamp is set.
-         * Note that this needs to be set before the PID filter later on,
-         * since the PID isn't reliable for these early stages, so we must
-         * save all timestamps and do the PID filter later when we can.
-         */
-        u64 ts = bpf_ktime_get_ns();
-        birth.update(&sk, &ts);
-    }
-
-    // record PID & comm on SYN_SENT
-    if (args->newstate == TCP_SYN_SENT || args->newstate == TCP_LAST_ACK) {
-        // now we can PID filter, both here and a little later on for CLOSE
-        FILTER_PID
-        struct id_t me = {.pid = pid};
-        bpf_get_current_comm(&me.task, sizeof(me.task));
-        whoami.update(&sk, &me);
-    }
-
-    if (args->newstate != TCP_CLOSE)
-        return 0;
-
-    // calculate lifespan
-    u64 *tsp, delta_us;
-    tsp = birth.lookup(&sk);
-    if (tsp == 0) {
-        whoami.delete(&sk);     // may not exist
-        return 0;               // missed create
-    }
-    delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
-    birth.delete(&sk);
-
-    // fetch possible cached data, and filter
-    struct id_t *mep;
-    mep = whoami.lookup(&sk);
-    if (mep != 0)
-        pid = mep->pid;
-    FILTER_PID
-
-    // get throughput stats. see tcp_get_info().
-    u64 rx_b = 0, tx_b = 0, sport = 0;
-    struct tcp_sock *tp = (struct tcp_sock *)sk;
-    // OLD: bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
-    // OLD: bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);
-    // NEW: rx_b = tp->bytes_received;
-    // NEW: tx_b = tp->bytes_acked;
-
-    u16 family = 0;
-    // OLD: bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
-    // NEW: family = sk->__sk_common.skc_family;
-
-    if (family == AF_INET) {
-
-        struct ipv4_data_t data4 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
-        data4.ts_us = bpf_ktime_get_ns() / 1000;
-        bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr);
-        bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr);
-        // a workaround until data4 compiles with separate lport/dport
-        data4.ports = dport + ((0ULL + lport) << 32);
-        data4.pid = pid;
-
-        if (mep == 0) {
-            bpf_get_current_comm(&data4.task, sizeof(data4.task));
-        } else {
-            bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task);
-        }
-        ipv4_events.perf_submit(args, &data4, sizeof(data4));
-
-    } else /* 6 */ {
-        struct ipv6_data_t data6 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
-        data6.ts_us = bpf_ktime_get_ns() / 1000;
-        bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6);
-        bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6);
-        // a workaround until data6 compiles with separate lport/dport
-        data6.ports = dport + ((0ULL + lport) << 32);
-        data6.pid = pid;
-        if (mep == 0) {
-            bpf_get_current_comm(&data6.task, sizeof(data6.task));
-        } else {
-            bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task);
-        }
-        ipv6_events.perf_submit(args, &data6, sizeof(data6));
-    }
-
-    if (mep != 0)
-        whoami.delete(&sk);
-
-    return 0;
-}
diff --git a/src/pmdas/bcc/modules/tcplife.bpf b/src/pmdas/bcc/modules/tcplife_tp.bpf
similarity index 80%
rename from src/pmdas/bcc/modules/tcplife.bpf
rename to src/pmdas/bcc/modules/tcplife_tp.bpf
index 19ca8d740..2b16b98e7 100644
--- a/src/pmdas/bcc/modules/tcplife.bpf
+++ b/src/pmdas/bcc/modules/tcplife_tp.bpf
@@ -2,7 +2,6 @@
 // Licensed under the Apache License, Version 2.0 (the "License")
 
 #include <uapi/linux/ptrace.h>
-#define KBUILD_MODNAME "pcpbcctcplife"
 #include <linux/tcp.h>
 #include <net/sock.h>
 #include <bcc/proto.h>
@@ -11,11 +10,10 @@ BPF_HASH(birth, struct sock *, u64);
 
 // separate data structs for ipv4 and ipv6
 struct ipv4_data_t {
-    // XXX: switch some to u32's when supported
     u64 ts_us;
-    u64 pid;
-    u64 saddr;
-    u64 daddr;
+    u32 pid;
+    u32 saddr;
+    u32 daddr;
     u64 ports;
     u64 rx_b;
     u64 tx_b;
@@ -26,7 +24,7 @@ BPF_PERF_OUTPUT(ipv4_events);
 
 struct ipv6_data_t {
     u64 ts_us;
-    u64 pid;
+    u32 pid;
     unsigned __int128 saddr;
     unsigned __int128 daddr;
     u64 ports;
@@ -54,11 +52,11 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
 
     // lport is either used in a filter here, or later
     u16 lport = args->sport;
-    //FILTER_LPORT
+    FILTER_LPORT
 
     // dport is either used in a filter here, or later
     u16 dport = args->dport;
-    //FILTER_DPORT
+    FILTER_DPORT
 
     /*
      * This tool includes PID and comm context. It's best effort, and may
@@ -115,20 +113,23 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
         pid = mep->pid;
     FILTER_PID
 
+    u16 family = args->family;
+    FILTER_FAMILY
+
     // get throughput stats. see tcp_get_info().
     u64 rx_b = 0, tx_b = 0, sport = 0;
     struct tcp_sock *tp = (struct tcp_sock *)sk;
-    // OLD: bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
-    // OLD: bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);
-    // NEW: rx_b = tp->bytes_received;
-    // NEW: tx_b = tp->bytes_acked;
+    rx_b = tp->bytes_received;
+    tx_b = tp->bytes_acked;
 
     if (args->family == AF_INET) {
-        struct ipv4_data_t data4 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
+        struct ipv4_data_t data4 = {};
+        data4.span_us = delta_us;
+        data4.rx_b = rx_b;
+        data4.tx_b = tx_b;
         data4.ts_us = bpf_ktime_get_ns() / 1000;
-        bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr);
-        bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr);
+        __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
+        __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
         // a workaround until data4 compiles with separate lport/dport
         data4.ports = dport + ((0ULL + lport) << 32);
         data4.pid = pid;
@@ -136,23 +137,25 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
         if (mep == 0) {
             bpf_get_current_comm(&data4.task, sizeof(data4.task));
         } else {
-            bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task);
+            bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task);
         }
         ipv4_events.perf_submit(args, &data4, sizeof(data4));
 
     } else /* 6 */ {
-        struct ipv6_data_t data6 = {.span_us = delta_us,
-            .rx_b = rx_b, .tx_b = tx_b};
+        struct ipv6_data_t data6 = {};
+        data6.span_us = delta_us;
+        data6.rx_b = rx_b;
+        data6.tx_b = tx_b;
         data6.ts_us = bpf_ktime_get_ns() / 1000;
-        bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6);
-        bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6);
+        __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
+        __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
         // a workaround until data6 compiles with separate lport/dport
         data6.ports = dport + ((0ULL + lport) << 32);
         data6.pid = pid;
         if (mep == 0) {
             bpf_get_current_comm(&data6.task, sizeof(data6.task));
         } else {
-            bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task);
+            bpf_probe_read_kernel(&data6.task, sizeof(data6.task), (void *)mep->task);
         }
         ipv6_events.perf_submit(args, &data6, sizeof(data6));
     }
diff --git a/src/pmdas/bcc/modules/tcpperpid.python b/src/pmdas/bcc/modules/tcpperpid.python
index 3cb2cfcfd..0096929a6 100644
--- a/src/pmdas/bcc/modules/tcpperpid.python
+++ b/src/pmdas/bcc/modules/tcpperpid.python
@@ -32,16 +32,10 @@ from modules.pcpbcc import PCPBCCBase
 #
 # BPF program
 #
-bpf_src = "modules/tcplife.bpf"
-# Compat with kernel < 4.16, bcc < 0.6
-TRACEFS = "/sys/kernel/debug/tracing"
-bpf_src_old_tp = "modules/tcplife_old_tp.bpf"
-bpf_src_old_kb = "modules/tcplife_old_kb.bpf"
-if not path.exists(TRACEFS + "/events/sock/inet_sock_set_state"):
-    if path.exists(TRACEFS + "/events/tcp/tcp_set_state"):
-        bpf_src = bpf_src_old_tp
-    else:
-        bpf_src = bpf_src_old_kb
+if BPF.tracepoint_exists("sock", "inet_sock_set_state"):
+    bpf_src = "modules/tcplife_tp.bpf"
+else:
+    bpf_src = "modules/tcplife_kp.bpf"
 
 # Alternative, "high resolution" BPF
 bpf_highres = "modules/tcptop.bpf"
@@ -53,36 +47,6 @@ MODULE = 'tcpperpid'
 BASENS = 'proc.io.net.total.'
 units_bytes = pmUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0)
 
-TASK_COMM_LEN = 16      # linux/sched.h
-
-class Data_ipv4(ct.Structure):
-    """ IPv4 data struct """
-    _fields_ = [
-        ("ts_us", ct.c_ulonglong),
-        ("pid", ct.c_ulonglong),
-        ("saddr", ct.c_ulonglong),
-        ("daddr", ct.c_ulonglong),
-        ("ports", ct.c_ulonglong),
-        ("rx_b", ct.c_ulonglong),
-        ("tx_b", ct.c_ulonglong),
-        ("span_us", ct.c_ulonglong),
-        ("task", ct.c_char * TASK_COMM_LEN)
-    ]
-
-class Data_ipv6(ct.Structure):
-    """ IPv6 data struct """
-    _fields_ = [
-        ("ts_us", ct.c_ulonglong),
-        ("pid", ct.c_ulonglong),
-        ("saddr", (ct.c_ulonglong * 2)),
-        ("daddr", (ct.c_ulonglong * 2)),
-        ("ports", ct.c_ulonglong),
-        ("rx_b", ct.c_ulonglong),
-        ("tx_b", ct.c_ulonglong),
-        ("span_us", ct.c_ulonglong),
-        ("task", ct.c_char * TASK_COMM_LEN)
-    ]
-
 #
 # PCP BCC Module
 #
@@ -133,15 +97,14 @@ class PCPBCCModule(PCPBCCBase):
         self.log("Using BPF source file %s." % src)
 
         # Exit hard if impossible to continue
-        if self.bcc_version() == "0.6.1" and src == bpf_src_old_kb and not self.highres:
-            raise RuntimeError("BCC 0.6.1 bug makes it incompatible with this module "
-                               "on kernel < 4.15 in non-highres mode.")
+        if self.bcc_version_tuple() < (0, 6, 1) and not self.highres:
+            raise RuntimeError("BCC 0.6.1+ is required for this module in non-highres mode.")
 
         self.log("Initialized.")
 
     def handle_ipv4_event(self, _cpu, data, _size):
         """ IPv4 event handler """
-        event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
+        event = self.bpf["ipv4_events"].event(data)
         pid = str(event.pid).zfill(6)
         self.lock.acquire()
         if pid not in self.ipv4_stats:
@@ -153,7 +116,7 @@ class PCPBCCModule(PCPBCCBase):
 
     def handle_ipv6_event(self, _cpu, data, _size):
         """ IPv6 event handler """
-        event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
+        event = self.bpf["ipv6_events"].event(data)
         pid = str(event.pid).zfill(6)
         self.lock.acquire()
         if pid not in self.ipv6_stats:
@@ -199,31 +162,25 @@ class PCPBCCModule(PCPBCCBase):
                     self.bpf_text = src.read()
                 if self.highres:
                     self.bpf_text = self.bpf_text.replace("FILTER", "FILTER_PID")
-                # Compat with bcc < 0.6
-                self.log("Testing BCC compatilibility, possible errors below are safe to ignore.")
-                try:
-                    test_txt = self.bpf_text.replace("// NEW: ", "").replace("FILTER_PID", "")
-                    test_bpf = BPF(text=test_txt)
-                    test_bpf.cleanup()
-                    self.bpf_text = self.bpf_text.replace("// NEW: ", "")
-                except Exception: # pylint: disable=broad-except
-                    self.bpf_text = self.bpf_text.replace("// OLD: ", "")
-                self.log("Tested BCC compatilibility, possible errors above are safe to ignore.")
 
                 if self.dports:
                     filterp = " && ".join(["dport != %d" % port for port in self.dports])
                     filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp
-                    self.bpf_text = self.bpf_text.replace("//FILTER_DPORT", filter_txt)
+                    self.bpf_text = self.bpf_text.replace("FILTER_DPORT", filter_txt)
                 if self.lports:
                     filterp = " && ".join(["lport != %d" % port for port in self.lports])
                     filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp
-                    self.bpf_text = self.bpf_text.replace("//FILTER_LPORT", filter_txt)
+                    self.bpf_text = self.bpf_text.replace("FILTER_LPORT", filter_txt)
 
             if not self.pids and self.proc_filter and self.proc_refresh:
                 self.log("No process to attach found, activation postponed.")
                 return
 
             bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False)
+            bpf_text = bpf_text.replace('FILTER_PID', '')
+            bpf_text = bpf_text.replace('FILTER_DPORT', '')
+            bpf_text = bpf_text.replace('FILTER_LPORT', '')
+            bpf_text = bpf_text.replace('FILTER_FAMILY', '')
 
             if self.debug:
                 self.log("BPF to be compiled:\n" + bpf_text.strip())
diff --git a/src/pmdas/bcc/modules/tcptop.bpf b/src/pmdas/bcc/modules/tcptop.bpf
index 349ee1529..c1fed7aef 100644
--- a/src/pmdas/bcc/modules/tcptop.bpf
+++ b/src/pmdas/bcc/modules/tcptop.bpf
@@ -4,6 +4,7 @@
 #include <uapi/linux/ptrace.h>
 #include <net/sock.h>
 #include <bcc/proto.h>
+
 struct ipv4_key_t {
     u32 pid;
     u32 saddr;
@@ -13,25 +14,32 @@ struct ipv4_key_t {
 };
 BPF_HASH(ipv4_send_bytes, struct ipv4_key_t);
 BPF_HASH(ipv4_recv_bytes, struct ipv4_key_t);
+
 struct ipv6_key_t {
+    unsigned __int128 saddr;
+    unsigned __int128 daddr;
     u32 pid;
-    // workaround until unsigned __int128 support:
-    u64 saddr0;
-    u64 saddr1;
-    u64 daddr0;
-    u64 daddr1;
     u16 lport;
     u16 dport;
+    u64 __pad__;
 };
 BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
 BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
+
 int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
     struct msghdr *msg, size_t size)
 {
-    u32 pid = bpf_get_current_pid_tgid();
-    FILTER
+    if (container_should_be_filtered()) {
+        return 0;
+    }
+
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    FILTER_PID
+
     u16 dport = 0, family = sk->__sk_common.skc_family;
-    u64 *val, zero = 0;
+
+    FILTER_FAMILY
+
     if (family == AF_INET) {
         struct ipv4_key_t ipv4_key = {.pid = pid};
         ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
@@ -39,31 +47,24 @@ int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
         ipv4_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv4_key.dport = ntohs(dport);
-        val = ipv4_send_bytes.lookup_or_init(&ipv4_key, &zero);
-        if (val) {
-            (*val) += size;
-        }
+        ipv4_send_bytes.increment(ipv4_key, size);
+
     } else if (family == AF_INET6) {
         struct ipv6_key_t ipv6_key = {.pid = pid};
-        bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]);
-        bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]);
+        bpf_probe_read_kernel(&ipv6_key.saddr, sizeof(ipv6_key.saddr),
+            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
+        bpf_probe_read_kernel(&ipv6_key.daddr, sizeof(ipv6_key.daddr),
+            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
         ipv6_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv6_key.dport = ntohs(dport);
-        val = ipv6_send_bytes.lookup_or_init(&ipv6_key, &zero);
-        if (val) {
-            (*val) += size;
-        }
+        ipv6_send_bytes.increment(ipv6_key, size);
     }
     // else drop
+
     return 0;
 }
+
 /*
  * tcp_recvmsg() would be obvious to trace, but is less suitable because:
  * - we'd need to trace both entry and return, to have both sock and size
@@ -72,12 +73,21 @@ int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
  */
 int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
 {
-    u32 pid = bpf_get_current_pid_tgid();
-    FILTER
+    if (container_should_be_filtered()) {
+        return 0;
+    }
+
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    FILTER_PID
+
     u16 dport = 0, family = sk->__sk_common.skc_family;
     u64 *val, zero = 0;
+
     if (copied <= 0)
         return 0;
+
+    FILTER_FAMILY
+
     if (family == AF_INET) {
         struct ipv4_key_t ipv4_key = {.pid = pid};
         ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
@@ -85,28 +95,20 @@ int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
         ipv4_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv4_key.dport = ntohs(dport);
-        val = ipv4_recv_bytes.lookup_or_init(&ipv4_key, &zero);
-        if (val) {
-            (*val) += copied;
-        }
+        ipv4_recv_bytes.increment(ipv4_key, copied);
+
     } else if (family == AF_INET6) {
         struct ipv6_key_t ipv6_key = {.pid = pid};
-        bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]);
-        bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]);
+        bpf_probe_read_kernel(&ipv6_key.saddr, sizeof(ipv6_key.saddr),
+            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
+        bpf_probe_read_kernel(&ipv6_key.daddr, sizeof(ipv6_key.daddr),
+            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
         ipv6_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv6_key.dport = ntohs(dport);
-        val = ipv6_recv_bytes.lookup_or_init(&ipv6_key, &zero);
-        if (val) {
-            (*val) += copied;
-        }
+        ipv6_recv_bytes.increment(ipv6_key, copied);
     }
     // else drop
+
     return 0;
 }
diff --git a/src/pmdas/bcc/modules/tcptop.python b/src/pmdas/bcc/modules/tcptop.python
index 45063dff3..db1c1da15 100644
--- a/src/pmdas/bcc/modules/tcptop.python
+++ b/src/pmdas/bcc/modules/tcptop.python
@@ -120,13 +120,14 @@ class PCPBCCModule(PCPBCCBase):
                 with open(path.dirname(__file__) + '/../' + bpf_src) as src:
                     self.bpf_text = src.read()
 
-                self.bpf_text = self.bpf_text.replace("FILTER", "FILTER_PID")
-
             if not self.pids and self.proc_filter and self.proc_refresh:
                 self.log("No process to attach found, activation postponed.")
                 return
 
             bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False)
+            bpf_text = bpf_text.replace('FILTER_PID', '')
+            bpf_text = bpf_text.replace('FILTER_FAMILY', '')
+            bpf_text = bpf_text.replace('container_should_be_filtered()', '0')
 
             if self.debug:
                 self.log("BPF to be compiled:\n" + bpf_text.strip())
@@ -155,21 +156,31 @@ class PCPBCCModule(PCPBCCBase):
 
     @staticmethod
     def ipv4_table_to_dict(table):
-        """ Build hashable dict from IPv4 BPF table """
-        return {TCPSessionKey(pid=k.pid,
-                              laddr=inet_ntop(AF_INET, pack("I", k.saddr)),
-                              lport=k.lport,
-                              daddr=inet_ntop(AF_INET, pack("I", k.daddr)),
-                              dport=k.dport):v.value for k, v in table.items()}
+        """Build hashable dict from IPv4 BPF table"""
+        return {
+            TCPSessionKey(
+                pid=k.pid,
+                laddr=inet_ntop(AF_INET, pack("I", k.saddr)),
+                lport=k.lport,
+                daddr=inet_ntop(AF_INET, pack("I", k.daddr)),
+                dport=k.dport,
+            ): v.value
+            for k, v in table.items()
+        }
 
     @staticmethod
     def ipv6_table_to_dict(table):
-        """ Build hashable dict from IPv6 BPF table """
-        return {TCPSessionKey(pid=k.pid,
-                              laddr=inet_ntop(AF_INET6, pack("QQ", k.saddr0, k.saddr1)),
-                              lport=k.lport,
-                              daddr=inet_ntop(AF_INET6, pack("QQ", k.daddr0, k.daddr1)),
-                              dport=k.dport):v.value for k, v in table.items()}
+        """Build hashable dict from IPv6 BPF table"""
+        return {
+            TCPSessionKey(
+                pid=k.pid,
+                laddr=inet_ntop(AF_INET6, k.saddr),
+                lport=k.lport,
+                daddr=inet_ntop(AF_INET6, k.daddr),
+                dport=k.dport,
+            ): v.value
+            for k, v in table.items()
+        }
 
     def refresh_stats(self):
         """ Refresh statistics from BPF table """
commit d45ce8e85035cc95ba897cd19967fad6d5d741be (cherry-picked)
Author: Andreas Gerstmayr <agerstmayr@redhat.com>
Date:   Wed Dec 15 08:03:40 2021 +0100

    qa: update qa/1118 to add new log output of runqlat bcc module

diff --git a/qa/1118 b/qa/1118
index 4123495b5..bcaec0a0d 100755
--- a/qa/1118
+++ b/qa/1118
@@ -21,12 +21,19 @@ _label_filter()
     grep '"0-1"' | grep '"statistic":"histogram"' | grep '"lower_bound":0' | grep 'upper_bound":1' > /dev/null && echo 'OK'
 }
 
+_install_filter()
+{
+    sed \
+    -e "s/Using BPF source file .\+/Using BPF source file X/g" \
+    #end
+}
+
 _prepare_pmda bcc
 trap "_pmdabcc_cleanup; exit \$status" 0 1 2 3 15
 _stop_auto_restart pmcd
 
 # real QA test starts here
-cat <<EOF | _pmdabcc_install
+cat <<EOF | _pmdabcc_install | _install_filter
 # Installed by PCP QA test $seq on `date`
 [pmda]
 modules = runqlat
diff --git a/qa/1118.out b/qa/1118.out
index 16a9fa4b4..e74f97964 100644
--- a/qa/1118.out
+++ b/qa/1118.out
@@ -9,6 +9,7 @@ Info: runqlat
 Info: Modules configured.
 Info: Initializing modules:
 Info: runqlat
+Info: runqlat: Using BPF source file X
 Info: runqlat: Initialized.
 Info: Modules initialized.
 Info: Registering metrics:
@@ -25,6 +26,7 @@ Info: runqlat
 Info: Modules configured.
 Info: Initializing modules:
 Info: runqlat
+Info: runqlat: Using BPF source file X
 Info: runqlat: Initialized.
 Info: Modules initialized.
 Info: Registering metrics: