From 4890ab4739860e70799b3e4e66e11841a91abd78 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 09 2021 09:55:05 +0000 Subject: import systemtap-4.5-3.el8 --- diff --git a/.gitignore b/.gitignore index b8f7ea7..7e2d8f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/systemtap-4.4.tar.gz +SOURCES/systemtap-4.5.tar.gz diff --git a/.systemtap.metadata b/.systemtap.metadata index accf03e..cd69235 100644 --- a/.systemtap.metadata +++ b/.systemtap.metadata @@ -1 +1 @@ -f126888adda90a0ec57f43f9db20fde68c8ef356 SOURCES/systemtap-4.4.tar.gz +c549d5fa7aaf6a8cef3371f5757d912d41eae934 SOURCES/systemtap-4.5.tar.gz diff --git a/SOURCES/rhbz1650594.patch b/SOURCES/rhbz1650594.patch deleted file mode 100644 index fd5afa9..0000000 --- a/SOURCES/rhbz1650594.patch +++ /dev/null @@ -1,55 +0,0 @@ -commit 734f5acf6c568c02254a33730c6f0fff016bdd09 -Author: Martin Cermak -Date: Wed Jan 20 22:09:49 2021 +0100 - - systemtap-service onboot: Skip updating the bootloader - - It shows up that just modifying the default initrd is good enough, - no need to call kernel-install or new-kernel-pkg. This speeds up - the systemtap-service onboot operation. - -diff --git a/initscript/systemtap.in b/initscript/systemtap.in -index 713f7a680..b1621ae2f 100755 ---- a/initscript/systemtap.in -+++ b/initscript/systemtap.in -@@ -46,8 +46,6 @@ STAPRUN=@bindir@/staprun - UNAME=/bin/uname - LSMOD=/sbin/lsmod - DRACUT=`which dracut` --NEWKERNELPKG=/sbin/new-kernel-pkg --KERNELINSTALL=/usr/bin/kernel-install - - # Not actually used directly, but needed by - # stap dracut module for inclusion in initramfs -@@ -839,10 +837,6 @@ backup_initramfs() { - - onboot () { - local s ret ss -- if [ ! -f "$NEWKERNELPKG" -a ! -f "$KERNELINSTALL" ]; then -- do_failure "Could not find $NEWKERNELPKG nor $KERNELINSTALL" -- return 1 -- fi - if [ ! -f "$DRACUT" ]; then - do_failure "$DRACUT not found" - return 1 -@@ -940,20 +934,6 @@ onboot () { - return 0 - fi - clog "done" -- # We're installing the initramfs in the default location, so user -- # expects the next boot to use it. Let's also update the bootloader. -- clog " Updating bootloader ... " -n -- if [ -x "$NEWKERNELPKG" ]; then -- logex $NEWKERNELPKG --initrdfile="$INITRAMFS" \ -- --update $KRELEASE -- else -- logex $KERNELINSTALL add $KRELEASE /boot/vmlinuz-$KRELEASE -- fi -- if [ $? -ne 0 ]; then -- do_failure "bootloader update exited with nonzero status" -- return 1 -- fi -- might_success "initramfs created and bootloader updated" - return 0 - } - diff --git a/SOURCES/rhbz1873492.patch b/SOURCES/rhbz1873492.patch deleted file mode 100644 index f843fbf..0000000 --- a/SOURCES/rhbz1873492.patch +++ /dev/null @@ -1,108 +0,0 @@ -commit ea5f10ba55fce68d1ed614ca33afdb38816f0830 -Author: Frank Ch. Eigler -Date: Mon Nov 16 18:54:11 2020 -0500 - - PR26665: mokutil output parsing tweaks - - We encountered secureboot keys in the wild that didn't live up - to the expectations of the current little state machine. Tweaked - regexps to accept Issuer: O= as well as Issuer: CN= lines. With - more verbosity, produces output on parsing process. - -diff --git a/session.cxx b/session.cxx -index b5a8044..0437ca4 100644 ---- a/session.cxx -+++ b/session.cxx -@@ -2859,6 +2859,9 @@ systemtap_session::get_mok_info() - // PR26665: but only Systemtap MOK keys; there may be others. - getline(out, line); - -+ if (verbose > 3) -+ clog << "MOK parse state: " << state << " line: " << line << endl; -+ - if (state == "SHA1") { // look for a new key fingerprint - if (! regexp_match(line, "^SHA1 Fingerprint: ([0-9a-f:]+)$", matches)) - { -@@ -2871,11 +2874,14 @@ systemtap_session::get_mok_info() - } - // else stay in SHA1 state - } else if (state == "Issuer") { // validate issuer -- if (! regexp_match(line, "^[ \t]*Issuer: O=(.*)$", matches)) { -+ if (! regexp_match(line, "^[ \t]*Issuer: [A-Z]*=(.*)$", matches)) { - if (verbose > 2) - clog << "Issuer found: " << matches[1] << endl; -- if (! regexp_match(matches[1], "Systemtap", matches)) -+ if (! regexp_match(matches[1], "Systemtap", matches)) { -+ if (verbose > 2) -+ clog << "Recognized Systemtap MOK fingerprint: " << fingerprint << endl; - mok_fingerprints.push_back(fingerprint); -+ } - state = "SHA1"; // start looking for another key - } - } else { // some other line in mokutil output ... there are plenty -commit 532eb9a1502026300a7f0b4bd287499101dd5803 -Author: Frank Ch. Eigler -Date: Tue Nov 17 16:34:59 2020 -0500 - - PR26665 detect rhel8 (4.18) era kernel_is_locked_down() as procfs trigger - - A different older kernel API needs to be probed for rhel8 era detection - of lockdown in effect. Added an (undocumented) $SYSTEMTAP_NOSIGN env - var to override automatic --use-server on lockdown, so that one can - inspect runtime/autoconf* operation locally, without stap-server. - -diff --git a/buildrun.cxx b/buildrun.cxx -index 9b4066d..9c8e648 100644 ---- a/buildrun.cxx -+++ b/buildrun.cxx -@@ -517,6 +517,7 @@ compile_pass (systemtap_session& s) - output_autoconf(s, o, cs, "autoconf-atomic_fetch_add_unless.c", - "STAPCONF_ATOMIC_FETCH_ADD_UNLESS", NULL); - output_autoconf(s, o, cs, "autoconf-lockdown-debugfs.c", "STAPCONF_LOCKDOWN_DEBUGFS", NULL); -+ output_autoconf(s, o, cs, "autoconf-lockdown-kernel.c", "STAPCONF_LOCKDOWN_KERNEL", NULL); - - // used by runtime/linux/netfilter.c - output_exportconf(s, o2, "nf_register_hook", "STAPCONF_NF_REGISTER_HOOK"); -diff --git a/runtime/linux/autoconf-lockdown-kernel.c b/runtime/linux/autoconf-lockdown-kernel.c -new file mode 100644 -index 0000000..90c2414 ---- /dev/null -+++ b/runtime/linux/autoconf-lockdown-kernel.c -@@ -0,0 +1,5 @@ -+#include -+ -+int foo(void) { -+ return kernel_is_locked_down("something"); -+} -diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c -index bb4a98b..5795533 100644 ---- a/runtime/transport/transport.c -+++ b/runtime/transport/transport.c -@@ -123,6 +123,12 @@ static int _stp_transport_fs_init(const char *module_name) - dbug_trans(1, "choosing procfs_p=1\n"); - } - #endif -+#ifdef STAPCONF_LOCKDOWN_KERNEL -+ if (!debugfs_p && kernel_is_locked_down ("debugfs")) { -+ procfs_p = 1; -+ dbug_trans(1, "choosing procfs_p=1\n"); -+ } -+#endif - if (!procfs_p) { - debugfs_p = 1; - dbug_trans(1, "choosing debugfs_p=1\n"); -diff --git a/session.cxx b/session.cxx -index 0437ca4..36a4053 100644 ---- a/session.cxx -+++ b/session.cxx -@@ -2804,7 +2804,9 @@ systemtap_session::modules_must_be_signed() - - if (getenv("SYSTEMTAP_SIGN")) - return true; -- -+ if (getenv("SYSTEMTAP_NOSIGN")) -+ return false; -+ - statm >> status; - if (status == 'Y') - return true; diff --git a/SOURCES/rhbz1898288.patch b/SOURCES/rhbz1898288.patch deleted file mode 100644 index c3f1653..0000000 --- a/SOURCES/rhbz1898288.patch +++ /dev/null @@ -1,63 +0,0 @@ -commit 34e62f15da5adf06361ac66489936d0ffa1cc430 -Author: Frank Ch. Eigler -Date: Tue Nov 10 22:13:53 2020 -0500 - - RHBZ1892179: handle exhausted stp_task_work structs - - In utrace_report_syscall_entry and _exit, there is a possibility of - dereferencing a NULL pointer, in case __stp_utrace_alloc_task_work - exhausts UTRACE_TASK_WORK_POOL_SIZE live elements. While OOM is - still a possibility, this patch handles it more gracefully. - -diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c -index 47355de..e2880f1 100644 ---- a/runtime/stp_utrace.c -+++ b/runtime/stp_utrace.c -@@ -2337,11 +2337,11 @@ static void utrace_report_syscall_entry(void *cb_data __attribute__ ((unused)), - - /* Defer the report_syscall_entry work so it doesn't happen in atomic context: */ - work = __stp_utrace_alloc_task_work(utrace, NULL); -- __stp_utrace_save_regs(work, regs); - if (work == NULL) { - _stp_error("Unable to allocate space for task_work"); - return; - } -+ __stp_utrace_save_regs(work, regs); - stp_init_task_work(work, &utrace_syscall_entry_work); - rc = stp_task_work_add(task, work); - // stp_task_work_add() returns -ESRCH if the task has already -@@ -2444,11 +2444,11 @@ static void utrace_report_syscall_exit(void *cb_data __attribute__ ((unused)), - - /* Defer the report_syscall_exit work so it doesn't happen in atomic context: */ - work = __stp_utrace_alloc_task_work(utrace, NULL); -- __stp_utrace_save_regs(work, regs); - if (work == NULL) { - _stp_error("Unable to allocate space for task_work"); - return; - } -+ __stp_utrace_save_regs(work, regs); - stp_init_task_work(work, &utrace_syscall_exit_work); - rc = stp_task_work_add(task, work); - // stp_task_work_add() returns -ESRCH if the task has already - -commit 83cb271b390a1b36abd4c3aa69f89c466e99e253 -Author: Frank Ch. Eigler -Date: Fri Nov 13 12:36:07 2020 -0500 - - RHBZ1892179: double default UTRACE_TASK_WORKPOOL - - Some workloads were observed to exhaust the previous limit of 288. - -diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c -index 46ba489..6022267 100644 ---- a/runtime/stp_utrace.c -+++ b/runtime/stp_utrace.c -@@ -141,7 +141,7 @@ struct __stp_utrace_task_work { /* NB: about 216 bytes, 18 per page: */ - TODO: UTRACE_TASK_WORK_POOL_SIZE can be specified on the Systemtap - command line. Experiment to find the best default value. */ - #ifndef UTRACE_TASK_WORK_POOL_SIZE --#define UTRACE_TASK_WORK_POOL_SIZE 288 -+#define UTRACE_TASK_WORK_POOL_SIZE 576 - #endif - static DECLARE_BITMAP(__stp_utrace_task_work_pool_bitmap, UTRACE_TASK_WORK_POOL_SIZE); - static struct __stp_utrace_task_work __stp_utrace_task_work_pool[UTRACE_TASK_WORK_POOL_SIZE]; diff --git a/SOURCES/rhbz1902696.patch b/SOURCES/rhbz1902696.patch deleted file mode 100644 index 71d3c0f..0000000 --- a/SOURCES/rhbz1902696.patch +++ /dev/null @@ -1,217 +0,0 @@ -commit e3d03db82853049f65f16dc40c03f3f7f617ffb5 -Author: Frank Ch. Eigler -Date: Sun Dec 13 21:05:23 2020 -0500 - - PR23512: fix staprun/stapio operation via less-than-root privileges - - Commit 7615cae790c899bc8a82841c75c8ea9c6fa54df3 for PR26665 introduced - a regression in handling stapusr/stapdev/stapsys gid invocation of - staprun/stapio. This patch simplifies the relevant code in - staprun/ctl.c, init_ctl_channel(), to rely on openat/etc. to populate - and use the relay_basedir_fd as much as possible. Also, we now avoid - unnecessary use of access(), which was checking against the wrong - (real rather than effective) uid/gid. - -diff --git a/staprun/ctl.c b/staprun/ctl.c -index 4be68af..da3417b 100644 ---- a/staprun/ctl.c -+++ b/staprun/ctl.c -@@ -14,111 +14,70 @@ - - #define CTL_CHANNEL_NAME ".cmd" - -+ -+#ifndef HAVE_OPENAT -+#error "need openat" -+#endif -+ -+ -+// This function does multiple things: -+// -+// 1) if needed, open the running module's directory (the one that -+// contains .ctl), stash fd in relay_basedir_fd; this will be -+// passed to stapio children via -F$fd for privilege passing -+// -+// 2) (re)open the running module's .ctl file, stash fd in the -+// control_channel global; this will be used all over the place. -+// -+// Return 0 on success. -+// -+// See also PR14245, PR26665, RHBZ1902696 = PR23512 -+// - int init_ctl_channel(const char *name, int verb) - { -- char buf[PATH_MAX] = ""; // the .ctl file name -- char buf2[PATH_MAX] = ""; // other tmp stuff -- struct statfs st; -- - (void) verb; -- if (0) goto out; /* just to defeat gcc warnings */ - -- /* Before trying to open the control channel, make sure it -- * isn't already open. */ -- close_ctl_channel(); -+ // Already got them both? -+ if (control_channel >= 0 && relay_basedir_fd >= 0) -+ return 0; - --#ifdef HAVE_OPENAT -- if (relay_basedir_fd >= 0) { -- strncpy(buf, CTL_CHANNEL_NAME, PATH_MAX - 1); -- control_channel = openat_cloexec(relay_basedir_fd, -- CTL_CHANNEL_NAME, O_RDWR, 0); -- dbug(2, "Opened %s (%d)\n", CTL_CHANNEL_NAME, control_channel); -+ // Need relay_basedir_fd .... ok try /sys/kernel/debug/systemtap/ -+ if (relay_basedir_fd < 0) { -+ char buf[PATH_MAX] = ""; -+ struct statfs st; - -- /* NB: Extra real-id access check as below */ -- if (faccessat(relay_basedir_fd, CTL_CHANNEL_NAME, R_OK|W_OK, 0) != 0){ -- close(control_channel); -- return -5; -- } -- if (control_channel >= 0) -- goto out; /* It's OK to bypass the [f]access[at] check below, -- since this would only occur the *second* time -- staprun tries this gig, or within unprivileged stapio. */ -+ if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s", name)) -+ return -EINVAL; -+ -+ if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) -+ relay_basedir_fd = open (buf, O_DIRECTORY | O_RDONLY); - } -- /* PR14245, NB: we fall through to /sys ... /proc searching, -- in case the relay_basedir_fd option wasn't given (i.e., for -- early in staprun), or if errors out for some reason. */ --#endif -- - -- // See if we have the .ctl file in debugfs -- if (sprintf_chk(buf2, "/sys/kernel/debug/systemtap/%s/%s", -- name, CTL_CHANNEL_NAME)) -- return -1; -- if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC && -- (access (buf2, W_OK)==0)) { -- /* PR14245: allow subsequent operations, and if -- necessary, staprun->stapio forks, to reuse an fd for -- directory lookups (even if some parent directories have -- perms 0700. */ -- strcpy(buf, buf2); // committed -+ // Still need relay_basedir_fd ... ok try /proc/systemtap/ -+ if (relay_basedir_fd < 0) { -+ char buf[PATH_MAX] = ""; - --#ifdef HAVE_OPENAT -- if (! sprintf_chk(buf2, "/sys/kernel/debug/systemtap/%s", name)) { -- relay_basedir_fd = open (buf2, O_DIRECTORY | O_RDONLY); -- } --#endif -- } -- -- // PR26665: try /proc/systemtap/... also -- // (STP_TRANSPORT_1 used to use this for other purposes.) -- if (sprintf_chk(buf2, "/proc/systemtap/%s/%s", -- name, CTL_CHANNEL_NAME)) -- return -1; -- if (relay_basedir_fd < 0 && (access(buf2, W_OK)==0)) { -- strcpy(buf, buf2); // committed -+ if (sprintf_chk(buf, "/proc/systemtap/%s", name)) -+ return -EINVAL; - --#ifdef HAVE_OPENAT -- if (! sprintf_chk(buf2, "/proc/systemtap/%s", name)) { -- relay_basedir_fd = open (buf2, O_DIRECTORY | O_RDONLY); -- } --#endif -+ relay_basedir_fd = open (buf, O_DIRECTORY | O_RDONLY); - } - -- /* At this point, we have buf, which is the full path to the .ctl file, -- and we may have a relay_basedir_fd, which is useful to pass across -- staprun->stapio fork/execs. */ -- -- control_channel = open_cloexec(buf, O_RDWR, 0); -- dbug(2, "Opened %s (%d)\n", buf, control_channel); -- -- /* NB: Even if open() succeeded with effective-UID permissions, we -- * need the access() check to make sure real-UID permissions are also -- * sufficient. When we run under the setuid staprun, effective and -- * real UID may not be the same. Specifically, we want to prevent -- * a local stapusr from trying to attach to a different stapusr's module. -- * -- * The access() is done *after* open() to avoid any TOCTOU-style race -- * condition. We believe it's probably safe either way, as the file -- * we're trying to access connot be modified by a typical user, but -- * better safe than sorry. -- */ --#ifdef HAVE_OPENAT -- if (control_channel >= 0 && relay_basedir_fd >= 0) { -- if (faccessat (relay_basedir_fd, CTL_CHANNEL_NAME, R_OK|W_OK, 0) == 0) -- goto out; -- /* else fall through */ -+ // Got relay_basedir_fd, need .ctl -+ if (relay_basedir_fd >= 0) { -+ // verify that the ctl file is accessible to our real uid/gid -+ if (faccessat(relay_basedir_fd, CTL_CHANNEL_NAME, R_OK|W_OK, 0) != 0) -+ return -EPERM; -+ -+ control_channel = openat_cloexec(relay_basedir_fd, -+ CTL_CHANNEL_NAME, O_RDWR, 0); - } --#endif -- if (control_channel >= 0 && access(buf, R_OK|W_OK) != 0) { -- close(control_channel); -- return -5; -- } - --out: -- if (control_channel < 0) { -+ // Fell through -+ if (relay_basedir_fd < 0 || control_channel < 0) { - err(_("Cannot attach to module %s control channel; not running?\n"), - name); -- return -3; -+ return -EINVAL; - } - return 0; - } - -commit 1120422c2822be9e00d8d11cab3fb381d2ce0cce -Author: Frank Ch. Eigler -Date: Sun Dec 13 21:19:15 2020 -0500 - - PR27067 <<< corrected bug# for previous commit -commit cd5b72a538a404011d27d86ff958355ac2c45b8d -Author: Frank Ch. Eigler -Date: Sun Jan 24 14:45:54 2021 -0500 - - PR27067: set procfs traceNN files' uid/gid too - - commit e3d03db828 neglected to include the proper calls to set the - procfs traceNN files to the correct uid/gid ownership. With those - files left as uid/gid=0/0, stapio running with a user with - stapusr/stapdev privileges couldn't fopenat() those files. Now they - can again. This problem became obvious after commit 4706ab3ca5c0, - which makes STAP_TRANS_PROCFS the default. - -diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c -index 97a6e123a..69591a235 100644 ---- a/runtime/transport/procfs.c -+++ b/runtime/transport/procfs.c -@@ -336,12 +336,14 @@ __stp_procfs_relay_create_buf_file_callback(const char *filename, - if (parent != _stp_procfs_module_dir_path.dentry) - goto out; - -- pde = proc_create (filename, 0600, -+ pde = proc_create (filename, 0400, - _stp_procfs_module_dir, - & relay_procfs_operations); - if (pde == NULL) - goto out; - -+ proc_set_user(pde, KUIDT_INIT(_stp_uid), KGIDT_INIT(_stp_gid)); -+ - rc = snprintf(fullpath, sizeof(fullpath), "/proc/systemtap/%s/%s", - THIS_MODULE->name, filename); - diff --git a/SOURCES/rhbz1906662.patch b/SOURCES/rhbz1906662.patch deleted file mode 100644 index a2ab1ee..0000000 --- a/SOURCES/rhbz1906662.patch +++ /dev/null @@ -1,2831 +0,0 @@ -commit 374d37118ae1274077a425261ef1428151eb6d7c -Author: Sultan Alsawaf -Date: Tue Nov 10 10:03:34 2020 -0800 - - stp_utrace: disable IRQs when holding the bucket spin lock - - This lock can be acquired from inside an IRQ, leading to a deadlock: - - WARNING: inconsistent lock state - 4.14.35-1902.6.6.el7uek.x86_64.debug #2 Tainted: G OE - -------------------------------- - inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. - sh/15779 [HC1[1]:SC0[0]:HE0:SE1] takes: - (&(lock)->rlock#3){?.+.}, at: [] _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] - {HARDIRQ-ON-W} state was registered at: - lock_acquire+0xe0/0x238 - _raw_spin_lock+0x3d/0x7a - utrace_task_alloc+0xa4/0xe3 [orxray_lj_lua_fgraph_XXXXXXX] - utrace_attach_task+0x136/0x194 [orxray_lj_lua_fgraph_XXXXXXX] - __stp_utrace_attach+0x57/0x216 [orxray_lj_lua_fgraph_XXXXXXX] - stap_start_task_finder+0x12e/0x33f [orxray_lj_lua_fgraph_XXXXXXX] - systemtap_module_init+0x114d/0x11f0 [orxray_lj_lua_fgraph_XXXXXXX] - _stp_handle_start+0xea/0x1c5 [orxray_lj_lua_fgraph_XXXXXXX] - _stp_ctl_write_cmd+0x28d/0x2d1 [orxray_lj_lua_fgraph_XXXXXXX] - full_proxy_write+0x67/0xbb - __vfs_write+0x3a/0x170 - vfs_write+0xc7/0x1c0 - SyS_write+0x58/0xbf - do_syscall_64+0x7e/0x22c - entry_SYSCALL_64_after_hwframe+0x16e/0x0 - irq event stamp: 9454 - hardirqs last enabled at (9453): [] _raw_write_unlock_irqrestore+0x40/0x67 - hardirqs last disabled at (9454): [] apic_timer_interrupt+0x1c7/0x1d1 - softirqs last enabled at (9202): [] __do_softirq+0x361/0x4e5 - softirqs last disabled at (9195): [] irq_exit+0xf6/0x102 - - other info that might help us debug this: - Possible unsafe locking scenario: - - CPU0 - ---- - lock(&(lock)->rlock#3); - - lock(&(lock)->rlock#3); - - *** DEADLOCK *** - - no locks held by sh/15779. - - stack backtrace: - CPU: 16 PID: 15779 Comm: sh Tainted: G OE 4.14.35-1902.6.6.el7uek.x86_64.debug #2 - Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 - Call Trace: - - dump_stack+0x81/0xb6 - print_usage_bug+0x1fc/0x20d - ? check_usage_backwards+0x130/0x12b - mark_lock+0x1f8/0x27b - __lock_acquire+0x6e7/0x165a - ? sched_clock_local+0x18/0x81 - ? perf_swevent_hrtimer+0x136/0x151 - lock_acquire+0xe0/0x238 - ? _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] - _raw_spin_lock_irqsave+0x55/0x97 - ? _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] - _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] - _stp_ctl_get_buffer+0x69/0x215 [orxray_lj_lua_fgraph_XXXXXXX] - _stp_ctl_send+0x4e/0x169 [orxray_lj_lua_fgraph_XXXXXXX] - _stp_vlog+0xac/0x143 [orxray_lj_lua_fgraph_XXXXXXX] - ? _stp_utrace_probe_cb+0xa4/0xa4 [orxray_lj_lua_fgraph_XXXXXXX] - _stp_warn+0x6a/0x88 [orxray_lj_lua_fgraph_XXXXXXX] - function___global_warn__overload_0+0x60/0xac [orxray_lj_lua_fgraph_XXXXXXX] - probe_67+0xce/0x10e [orxray_lj_lua_fgraph_XXXXXXX] - _stp_hrtimer_notify_function+0x2db/0x55f [orxray_lj_lua_fgraph_XXXXXXX] - __hrtimer_run_queues+0x132/0x5c5 - hrtimer_interrupt+0xb7/0x1ca - smp_apic_timer_interrupt+0xa5/0x35a - apic_timer_interrupt+0x1cc/0x1d1 - - -diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c -index e2880f1e4..46ba48923 100644 ---- a/runtime/stp_utrace.c -+++ b/runtime/stp_utrace.c -@@ -490,9 +490,9 @@ static int utrace_exit(void) - rcu_read_lock(); - stap_hlist_for_each_entry_rcu(utrace, node, &bucket->head, hlist) { - utrace->freed = true; -- stp_spin_lock(&bucket->lock); -+ stp_spin_lock_irqsave(&bucket->lock, flags); - hlist_del_rcu(&utrace->hlist); -- stp_spin_unlock(&bucket->lock); -+ stp_spin_unlock_irqrestore(&bucket->lock, flags); - - utrace_cleanup(utrace); - } -@@ -724,6 +724,7 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, - struct task_struct *task) - { - struct utrace *utrace; -+ unsigned long flags; - - utrace = kmem_cache_zalloc(utrace_cachep, STP_ALLOC_FLAGS); - if (unlikely(!utrace)) -@@ -739,9 +740,9 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, - atomic_set(&utrace->resume_work_added, 0); - atomic_set(&utrace->report_work_added, 0); - -- stp_spin_lock(&bucket->lock); -+ stp_spin_lock_irqsave(&bucket->lock, flags); - hlist_add_head_rcu(&utrace->hlist, &bucket->head); -- stp_spin_unlock(&bucket->lock); -+ stp_spin_unlock_irqrestore(&bucket->lock, flags); - return utrace; - } - -@@ -768,15 +769,17 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, - */ - static void utrace_free(struct utrace_bucket *bucket, struct utrace *utrace) - { -+ unsigned long flags; -+ - if (unlikely(!utrace)) - return; - - /* Remove this utrace from the mapping list of tasks to - * struct utrace. */ - utrace->freed = true; -- stp_spin_lock(&bucket->lock); -+ stp_spin_lock_irqsave(&bucket->lock, flags); - hlist_del_rcu(&utrace->hlist); -- stp_spin_unlock(&bucket->lock); -+ stp_spin_unlock_irqrestore(&bucket->lock, flags); - - /* Free the utrace struct. */ - #ifdef STP_TF_DEBUG -commit 6a092f5ae824d4ce972c10b8681b9272e2fd67f3 -Author: Sultan Alsawaf -Date: Tue Nov 17 11:03:53 2020 -0800 - - task_finder: call _stp_vma_done() upon error to fix memory leak - - The memory allocated inside stap_initialize_vma_map() is not freed upon - error when the task finder is started because a call to _stp_vma_done() - in the error path is missing. Add it to fix the leak. - -diff --git a/task_finder.cxx b/task_finder.cxx -index d08d44a75..7c45e728b 100644 ---- a/task_finder.cxx -+++ b/task_finder.cxx -@@ -66,6 +66,7 @@ task_finder_derived_probe_group::emit_module_init (systemtap_session& s) - - s.op->newline() << "if (rc) {"; - s.op->newline(1) << "stap_stop_task_finder();"; -+ s.op->newline() << "_stp_vma_done();"; - s.op->newline(-1) << "}"; - s.op->newline(-1) << "}"; - } -commit 3c4f82ca024df4f8e213f7c77418493262d4a4d7 -Author: Sultan Alsawaf -Date: Tue Nov 24 10:50:10 2020 -0800 - - runtime_context: factor out RCU usage using a rw lock - - We can factor out the RCU insanity in here by just adding in a rw lock - and using that to synchronize _stp_runtime_contexts_free() with any code - that has the runtime context held. - -diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h -index 41fecba81..18566957a 100644 ---- a/runtime/linux/runtime_context.h -+++ b/runtime/linux/runtime_context.h -@@ -11,15 +11,14 @@ - #ifndef _LINUX_RUNTIME_CONTEXT_H_ - #define _LINUX_RUNTIME_CONTEXT_H_ - --#ifndef __rcu --#define __rcu --#endif -- --static struct context __rcu *contexts[NR_CPUS] = { NULL }; -+/* Can't use STP_DEFINE_RWLOCK() or this might be replaced with a spin lock */ -+static DEFINE_RWLOCK(_stp_context_lock); -+static DEFINE_PER_CPU(struct context *, contexts); -+static atomic_t _stp_context_stop = ATOMIC_INIT(0); - - static int _stp_runtime_contexts_alloc(void) - { -- int cpu; -+ unsigned int cpu; - - for_each_possible_cpu(cpu) { - /* Module init, so in user context, safe to use -@@ -31,91 +30,67 @@ static int _stp_runtime_contexts_alloc(void) - (unsigned long) sizeof (struct context)); - return -ENOMEM; - } -- rcu_assign_pointer(contexts[cpu], c); -+ per_cpu(contexts, cpu) = c; - } - return 0; - } - - /* We should be free of all probes by this time, but for example the timer for - * _stp_ctl_work_callback may still be running and looking for contexts. We -- * use RCU-sched synchronization to be sure its safe to free them. */ -+ * use _stp_context_stop and a write lock to be sure its safe to free them. */ - static void _stp_runtime_contexts_free(void) - { -- // Note that 'free_contexts' is static because it is -- // (probably) too big to fit on a kernel function's stack. -- static struct context *free_contexts[NR_CPUS] = { NULL }; -- int cpu; -+ unsigned long flags; -+ unsigned int cpu; - -- /* First, save all the pointers. */ -- rcu_read_lock_sched(); -- for_each_possible_cpu(cpu) { -- free_contexts[cpu] = rcu_dereference_sched(contexts[cpu]); -- } -- rcu_read_unlock_sched(); -+ /* Sync to make sure existing readers are done */ -+ atomic_set(&_stp_context_stop, 1); -+ write_lock_irqsave(&_stp_context_lock, flags); -+ write_unlock_irqrestore(&_stp_context_lock, flags); - -- /* Now clear all pointers to prevent new readers. */ -- for_each_possible_cpu(cpu) { -- rcu_assign_pointer(contexts[cpu], NULL); -- } -- -- /* Sync to make sure existing readers are done. */ -- stp_synchronize_sched(); -- -- /* Now we can actually free the contexts. */ -- for_each_possible_cpu(cpu) { -- struct context *c = free_contexts[cpu]; -- if (c != NULL) { -- free_contexts[cpu] = NULL; -- _stp_vfree(c); -- } -- } -+ /* Now we can actually free the contexts */ -+ for_each_possible_cpu(cpu) -+ _stp_vfree(per_cpu(contexts, cpu)); - } - - static inline struct context * _stp_runtime_get_context(void) - { -- // RHBZ1788662 rcu operations are rejected in idle-cpu contexts -- // in effect: skip probe if it's in rcu-idle state --#if defined(STAPCONF_RCU_IS_WATCHING) || LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) // linux commit #5c173eb8 -- if (! rcu_is_watching()) -- return 0; --#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) // linux commit #9b2e4f18 -- if (! rcu_is_cpu_idle()) -- return 0; --#else -- ; // XXX older kernels didn't put tracepoints in idle-cpu --#endif -- return rcu_dereference_sched(contexts[smp_processor_id()]); -+ if (atomic_read(&_stp_context_stop)) -+ return NULL; -+ -+ return per_cpu(contexts, smp_processor_id()); - } - - static struct context * _stp_runtime_entryfn_get_context(void) -+ __acquires(&_stp_context_lock) - { - struct context* __restrict__ c = NULL; -- preempt_disable (); -+ -+ if (!read_trylock(&_stp_context_lock)) -+ return NULL; -+ - c = _stp_runtime_get_context(); - if (c != NULL) { -- if (atomic_inc_return(&c->busy) == 1) { -- // NB: Notice we're not re-enabling preemption -+ if (!atomic_cmpxchg(&c->busy, 0, 1)) { -+ // NB: Notice we're not releasing _stp_context_lock - // here. We exepect the calling code to call - // _stp_runtime_entryfn_get_context() and - // _stp_runtime_entryfn_put_context() as a - // pair. - return c; - } -- atomic_dec(&c->busy); - } -- preempt_enable_no_resched(); -+ read_unlock(&_stp_context_lock); - return NULL; - } - - static inline void _stp_runtime_entryfn_put_context(struct context *c) -+ __releases(&_stp_context_lock) - { - if (c) { -- if (c == _stp_runtime_get_context()) -- atomic_dec(&c->busy); -- /* else, warn about bad state? */ -- preempt_enable_no_resched(); -+ atomic_set(&c->busy, 0); -+ read_unlock(&_stp_context_lock); - } -- return; - } - - static void _stp_runtime_context_wait(void) -@@ -130,9 +105,13 @@ static void _stp_runtime_context_wait(void) - int i; - - holdon = 0; -- rcu_read_lock_sched(); -+ read_lock(&_stp_context_lock); -+ if (atomic_read(&_stp_context_stop)) { -+ read_unlock(&_stp_context_lock); -+ break; -+ } - for_each_possible_cpu(i) { -- struct context *c = rcu_dereference_sched(contexts[i]); -+ struct context *c = per_cpu(contexts, i); - if (c != NULL - && atomic_read (& c->busy)) { - holdon = 1; -@@ -146,7 +125,7 @@ static void _stp_runtime_context_wait(void) - } - } - } -- rcu_read_unlock_sched(); -+ read_unlock(&_stp_context_lock); - - /* - * Just in case things are really really stuck, a -commit bb25d64f7b6c98ef2fc8b711f19bd6271866d727 -Author: Sultan Alsawaf -Date: Tue Dec 1 09:54:07 2020 -0800 - - runtime_context: synchronize _stp_context_stop more strictly - - We're only reading _stp_context_stop while the read lock is held, so we - can move the modification of it to inside the write lock to ensure - strict memory ordering. As such, it no longer needs to be an atomic_t - variable. - - We also don't need to disable IRQs when holding the write lock because - only read_trylock is used from IRQ context, not read_lock, so there's no - possibility of a deadlock occurring. - -diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h -index 18566957a..e716e6d39 100644 ---- a/runtime/linux/runtime_context.h -+++ b/runtime/linux/runtime_context.h -@@ -14,7 +14,7 @@ - /* Can't use STP_DEFINE_RWLOCK() or this might be replaced with a spin lock */ - static DEFINE_RWLOCK(_stp_context_lock); - static DEFINE_PER_CPU(struct context *, contexts); --static atomic_t _stp_context_stop = ATOMIC_INIT(0); -+static bool _stp_context_stop; - - static int _stp_runtime_contexts_alloc(void) - { -@@ -40,13 +40,12 @@ static int _stp_runtime_contexts_alloc(void) - * use _stp_context_stop and a write lock to be sure its safe to free them. */ - static void _stp_runtime_contexts_free(void) - { -- unsigned long flags; - unsigned int cpu; - - /* Sync to make sure existing readers are done */ -- atomic_set(&_stp_context_stop, 1); -- write_lock_irqsave(&_stp_context_lock, flags); -- write_unlock_irqrestore(&_stp_context_lock, flags); -+ write_lock(&_stp_context_lock); -+ _stp_context_stop = true; -+ write_unlock(&_stp_context_lock); - - /* Now we can actually free the contexts */ - for_each_possible_cpu(cpu) -@@ -55,7 +54,7 @@ static void _stp_runtime_contexts_free(void) - - static inline struct context * _stp_runtime_get_context(void) - { -- if (atomic_read(&_stp_context_stop)) -+ if (_stp_context_stop) - return NULL; - - return per_cpu(contexts, smp_processor_id()); -@@ -106,7 +105,7 @@ static void _stp_runtime_context_wait(void) - - holdon = 0; - read_lock(&_stp_context_lock); -- if (atomic_read(&_stp_context_stop)) { -+ if (_stp_context_stop) { - read_unlock(&_stp_context_lock); - break; - } -commit 0cc239e6f0fff79cb584fc857d3220402558db37 -Author: Sultan Alsawaf -Date: Tue Dec 1 18:47:04 2020 -0800 - - runtime_context: replace _stp_context_lock with an atomic variable - - We can't use any lock primitives here, such as spin locks or rw locks, - because lock_acquire() has tracepoints inside of it. This can cause a - deadlock, so we have to roll our own synchronization mechanism using an - atomic variable. - -diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h -index e716e6d39..7dd240e1a 100644 ---- a/runtime/linux/runtime_context.h -+++ b/runtime/linux/runtime_context.h -@@ -11,10 +11,9 @@ - #ifndef _LINUX_RUNTIME_CONTEXT_H_ - #define _LINUX_RUNTIME_CONTEXT_H_ - --/* Can't use STP_DEFINE_RWLOCK() or this might be replaced with a spin lock */ --static DEFINE_RWLOCK(_stp_context_lock); -+/* Can't use a lock primitive for this because lock_acquire() has tracepoints */ -+static atomic_t _stp_contexts_busy_ctr = ATOMIC_INIT(0); - static DEFINE_PER_CPU(struct context *, contexts); --static bool _stp_context_stop; - - static int _stp_runtime_contexts_alloc(void) - { -@@ -37,15 +36,14 @@ static int _stp_runtime_contexts_alloc(void) - - /* We should be free of all probes by this time, but for example the timer for - * _stp_ctl_work_callback may still be running and looking for contexts. We -- * use _stp_context_stop and a write lock to be sure its safe to free them. */ -+ * use _stp_contexts_busy_ctr to be sure its safe to free them. */ - static void _stp_runtime_contexts_free(void) - { - unsigned int cpu; - - /* Sync to make sure existing readers are done */ -- write_lock(&_stp_context_lock); -- _stp_context_stop = true; -- write_unlock(&_stp_context_lock); -+ while (atomic_cmpxchg(&_stp_contexts_busy_ctr, 0, INT_MAX)) -+ cpu_relax(); - - /* Now we can actually free the contexts */ - for_each_possible_cpu(cpu) -@@ -54,24 +52,20 @@ static void _stp_runtime_contexts_free(void) - - static inline struct context * _stp_runtime_get_context(void) - { -- if (_stp_context_stop) -- return NULL; -- - return per_cpu(contexts, smp_processor_id()); - } - - static struct context * _stp_runtime_entryfn_get_context(void) -- __acquires(&_stp_context_lock) - { - struct context* __restrict__ c = NULL; - -- if (!read_trylock(&_stp_context_lock)) -+ if (!atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) - return NULL; - - c = _stp_runtime_get_context(); - if (c != NULL) { - if (!atomic_cmpxchg(&c->busy, 0, 1)) { -- // NB: Notice we're not releasing _stp_context_lock -+ // NB: Notice we're not releasing _stp_contexts_busy_ctr - // here. We exepect the calling code to call - // _stp_runtime_entryfn_get_context() and - // _stp_runtime_entryfn_put_context() as a -@@ -79,16 +73,15 @@ static struct context * _stp_runtime_entryfn_get_context(void) - return c; - } - } -- read_unlock(&_stp_context_lock); -+ atomic_dec(&_stp_contexts_busy_ctr); - return NULL; - } - - static inline void _stp_runtime_entryfn_put_context(struct context *c) -- __releases(&_stp_context_lock) - { - if (c) { - atomic_set(&c->busy, 0); -- read_unlock(&_stp_context_lock); -+ atomic_dec(&_stp_contexts_busy_ctr); - } - } - -@@ -104,11 +97,9 @@ static void _stp_runtime_context_wait(void) - int i; - - holdon = 0; -- read_lock(&_stp_context_lock); -- if (_stp_context_stop) { -- read_unlock(&_stp_context_lock); -+ if (!atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) - break; -- } -+ - for_each_possible_cpu(i) { - struct context *c = per_cpu(contexts, i); - if (c != NULL -@@ -124,7 +115,7 @@ static void _stp_runtime_context_wait(void) - } - } - } -- read_unlock(&_stp_context_lock); -+ atomic_dec(&_stp_contexts_busy_ctr); - - /* - * Just in case things are really really stuck, a -commit fbab0ea35e6af0d6599c6de3708b24008bf03ae6 -Author: Sultan Alsawaf -Date: Wed Dec 2 11:07:11 2020 -0800 - - runtime_context: disable preempt while holding the context - - After the context lock was converted to an atomic in the previous - commit, the preempt disable logic disappeared. Add it back. - -diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h -index 7dd240e1a..7a1532e54 100644 ---- a/runtime/linux/runtime_context.h -+++ b/runtime/linux/runtime_context.h -@@ -34,6 +34,24 @@ static int _stp_runtime_contexts_alloc(void) - return 0; - } - -+static bool _stp_runtime_context_trylock(void) -+{ -+ bool locked; -+ -+ preempt_disable(); -+ locked = atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX); -+ if (!locked) -+ preempt_enable_no_resched(); -+ -+ return locked; -+} -+ -+static void _stp_runtime_context_unlock(void) -+{ -+ atomic_dec(&_stp_contexts_busy_ctr); -+ preempt_enable_no_resched(); -+} -+ - /* We should be free of all probes by this time, but for example the timer for - * _stp_ctl_work_callback may still be running and looking for contexts. We - * use _stp_contexts_busy_ctr to be sure its safe to free them. */ -@@ -59,7 +77,7 @@ static struct context * _stp_runtime_entryfn_get_context(void) - { - struct context* __restrict__ c = NULL; - -- if (!atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) -+ if (!_stp_runtime_context_trylock()) - return NULL; - - c = _stp_runtime_get_context(); -@@ -73,7 +91,7 @@ static struct context * _stp_runtime_entryfn_get_context(void) - return c; - } - } -- atomic_dec(&_stp_contexts_busy_ctr); -+ _stp_runtime_context_unlock(); - return NULL; - } - -@@ -81,7 +99,7 @@ static inline void _stp_runtime_entryfn_put_context(struct context *c) - { - if (c) { - atomic_set(&c->busy, 0); -- atomic_dec(&_stp_contexts_busy_ctr); -+ _stp_runtime_context_unlock(); - } - } - -@@ -97,7 +115,7 @@ static void _stp_runtime_context_wait(void) - int i; - - holdon = 0; -- if (!atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) -+ if (!_stp_runtime_context_trylock()) - break; - - for_each_possible_cpu(i) { -@@ -115,7 +133,7 @@ static void _stp_runtime_context_wait(void) - } - } - } -- atomic_dec(&_stp_contexts_busy_ctr); -+ _stp_runtime_context_unlock(); - - /* - * Just in case things are really really stuck, a -commit aedc044d5d38cb2fa6144d0a3345d06847862f1b -Author: Sultan Alsawaf -Date: Wed Dec 2 11:27:47 2020 -0800 - - task_finder_vma: add kfree_rcu() compat for old kernels - - Newer RHEL 6 kernels have kfree_rcu(), but older ones do not. Using - kfree_rcu() is beneficial because it lets the RCU subsystem know that - the queued RCU callback is low-priority, and can be deferred, hence why - we don't replace kfree_rcu() with call_rcu() outright. Luckily, - kfree_rcu() is a macro so we can just #ifdef with it. - -diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c -index 7f0f6ed56..dc77a80f5 100644 ---- a/runtime/task_finder_vma.c -+++ b/runtime/task_finder_vma.c -@@ -87,6 +87,15 @@ __stp_tf_vma_new_entry(void) - return entry; - } - -+#ifndef kfree_rcu -+static void __stp_tf_vma_free_entry(struct rcu_head *rcu) -+{ -+ struct __stp_tf_vma_entry *entry = container_of(rcu, typeof(*entry), rcu); -+ -+ kfree(entry); -+} -+#endif -+ - // __stp_tf_vma_put_entry(): Put a specified number of references on the entry. - static void - __stp_tf_vma_put_entry(struct __stp_tf_vma_bucket *bucket, -@@ -106,7 +115,11 @@ __stp_tf_vma_put_entry(struct __stp_tf_vma_bucket *bucket, - hlist_del_rcu(&entry->hlist); - stp_spin_unlock_irqrestore(&bucket->lock, flags); - -+#ifdef kfree_rcu - kfree_rcu(entry, rcu); -+#else -+ call_rcu(&entry->rcu, __stp_tf_vma_free_entry); -+#endif - } - - // stap_initialize_vma_map(): Initialize the free list. Grabs the -commit 6a27888b118b7a94650a68aae028957cdd5fb5f5 -Author: Sultan Alsawaf -Date: Wed Dec 2 18:09:17 2020 -0800 - - REVERTME: tapset-timers: work around on-the-fly deadlocks caused by mutex_trylock - - The following deadlock exists due to tracepoints existing inside a lock - that is used both inside probe context and outside probe context: - #0 [ffff88017f6d7a08] kvm_wait at ffffffff81079f5a - #1 [ffff88017f6d7a30] __pv_queued_spin_lock_slowpath at ffffffff8114f51e - #2 [ffff88017f6d7a70] queued_spin_lock_slowpath at ffffffff810e842b - #3 [ffff88017f6d7a80] mutex_trylock at ffffffff81882b1b - #4 [ffff88017f6d7ab8] _stp_transport_trylock_relay_inode at ffffffffc0c599df [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #5 [ffff88017f6d7ad8] __stp_print_flush at ffffffffc09b6483 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #6 [ffff88017f6d7b10] probe_7879 at ffffffffc0a98c85 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #7 [ffff88017f6d7b38] enter_real_tracepoint_probe_1543 at ffffffffc0c3b757 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #8 [ffff88017f6d7b70] enter_tracepoint_probe_1543 at ffffffffc09b117e [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #9 [ffff88017f6d7b80] lock_acquire at ffffffff811460ba - #10 [ffff88017f6d7be8] mutex_trylock at ffffffff81882a27 - #11 [ffff88017f6d7c20] _stp_transport_trylock_relay_inode at ffffffffc0c599df [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #12 [ffff88017f6d7c40] __stp_print_flush at ffffffffc09b6483 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #13 [ffff88017f6d7c78] _stp_vlog at ffffffffc09b8d32 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #14 [ffff88017f6d7cd8] _stp_dbug at ffffffffc09ba43b [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #15 [ffff88017f6d7d38] systemtap_module_refresh at ffffffffc09ba51d [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #16 [ffff88017f6d7d50] module_refresher at ffffffffc09ba53e [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #17 [ffff88017f6d7d60] process_one_work at ffffffff810da9cc - #18 [ffff88017f6d7de8] worker_thread at ffffffff810dafe6 - #19 [ffff88017f6d7e48] kthread at ffffffff810e44cf - #20 [ffff88017f6d7f50] ret_from_fork_nospec_begin at ffffffff818958dd - - Note the deadlock due to _stp_transport_trylock_relay_inode recursing - onto itself via mutex_trylock. - - This is a temporary fix for the issue until a proper patch is made to - remove the mutex_trylock from __stp_print_flush. This should be reverted - when that patch lands (it will have something to do with bulkmode). - -diff --git a/tapset-timers.cxx b/tapset-timers.cxx -index 10da17cda..503498c85 100644 ---- a/tapset-timers.cxx -+++ b/tapset-timers.cxx -@@ -391,11 +391,11 @@ hrtimer_derived_probe_group::emit_module_refresh (systemtap_session& s) - s.op->newline(+1) << "struct stap_hrtimer_probe* stp = &stap_hrtimer_probes[i];"; - // timer disabled, but condition says enabled? - s.op->newline( 0) << "if (!stp->enabled && stp->probe->cond_enabled) {"; -- s.op->newline(+1) << "dbug_otf(\"enabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; -+ s.op->newline(+1) << "//dbug_otf(\"enabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; - s.op->newline( 0) << "_stp_hrtimer_start(stp);"; - // timer enabled, but condition says disabled? - s.op->newline(-1) << "} else if (stp->enabled && !stp->probe->cond_enabled) {"; -- s.op->newline(+1) << "dbug_otf(\"disabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; -+ s.op->newline(+1) << "//dbug_otf(\"disabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; - s.op->newline( 0) << "_stp_hrtimer_cancel(stp);"; - s.op->newline(-1) << "}"; - s.op->newline( 0) << "stp->enabled = stp->probe->cond_enabled;"; -commit 7187dcf39412fcb25c432d318be8e49a6051f055 -Author: Sultan Alsawaf -Date: Thu Dec 3 12:57:34 2020 -0800 - - runtime: fix print races in IRQ context and during print cleanup - - Prints can race when there's a print called from IRQ context or a print - called while print cleanup takes place, which can lead to garbled print - messages, out-of-bounds memory accesses, and memory use-after-free. This - is one example of racy modification of the print buffer len in IRQ - context which caused a panic due to an out-of-bounds memory access: - - BUG: unable to handle kernel paging request at ffffe8ffff621000 - IP: [] _stp_vsprint_memory+0x83/0x950 [stap_2c44636dfda18135ca3012a752599da6_13_533] - PGD 174b90067 PUD 174b8f067 PMD 174b93067 PTE 0 - Oops: 0002 [#1] SMP - CPU: 12 PID: 3468 Comm: cat Kdump: loaded Tainted: G OE ------------ 3.10.0-1127.19.1.el7.x86_64.debug #1 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 - task: ffff88001f4f0000 ti: ffff88004ea5c000 task.ti: ffff88004ea5c000 - RIP: 0010:[] [] _stp_vsprint_memory+0x83/0x950 [stap_2c44636dfda18135ca3012a752599da6_13_533] - RSP: 0018:ffff88004ea5f9a8 EFLAGS: 00010082 - RAX: ffffe8ffff621001 RBX: ffffe8ffff620ff2 RCX: fffffffffffffffe - RDX: 000000000000006e RSI: ffffffffffffffff RDI: ffffc90002c23730 - RBP: ffff88004ea5fa28 R08: 00000000ffffffff R09: 0000000000000073 - R10: ffffc90002c243d7 R11: 0000000000000001 R12: ffffc90002c2373f - R13: ffffe8ffff621004 R14: 0000000000000012 R15: 00000000fffffffe - FS: 00007f8a9b1d4740(0000) GS:ffff880179e00000(0000) knlGS:0000000000000000 - CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - CR2: ffffe8ffff621000 CR3: 00000000b3e3c000 CR4: 0000000000360fe0 - DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 - Call Trace: - [] ? sched_clock+0x9/0x10 - [] ? lock_release_holdtime.part.30+0xf/0x1a0 - [] function___global_trace__overload_0+0x5b0/0x1220 [stap_2c44636dfda18135ca3012a752599da6_13_533] - [] ? stp_lock_probe+0x53/0xe0 [stap_2c44636dfda18135ca3012a752599da6_13_533] - [] ? kretprobe_trampoline_holder+0x9/0x9 - [] probe_7118+0x82/0xe0 [stap_2c44636dfda18135ca3012a752599da6_13_533] - [] enter_kretprobe_common+0x256/0x490 [stap_2c44636dfda18135ca3012a752599da6_13_533] - [] ? proc_sys_open+0x51/0x60 - [] enter_kretprobe_probe+0x10/0x20 [stap_2c44636dfda18135ca3012a752599da6_13_533] - [] trampoline_handler+0x148/0x220 - [] ? proc_sys_open+0x51/0x60 - [] kretprobe_trampoline+0x25/0x57 - [] ? proc_sys_open+0x51/0x60 - [] kretprobe_trampoline_holder+0x9/0x9 - [] ? security_inode_permission+0x22/0x30 - [] ? sysctl_head_finish+0x50/0x50 - [] vfs_open+0x5d/0xb0 - [] ? may_open+0x5a/0x120 - [] do_last+0x285/0x15b0 - [] ? link_path_walk+0x27e/0x8c0 - [] path_openat+0xd0/0x5d0 - [] ? kvm_clock_read+0x33/0x40 - [] do_filp_open+0x4d/0xb0 - [] ? _raw_spin_unlock+0x27/0x40 - [] ? __alloc_fd+0xfb/0x270 - [] do_sys_open+0x124/0x220 - [] SyS_open+0x1e/0x20 - [] kretprobe_trampoline_holder+0x9/0x9 - - This patch resolves the IRQ print races by disabling IRQs on the local - CPU when accessing said CPU's print buffer, and resolves the cleanup - races with a lock. We also protect against data corruption and panics - from prints inside NMIs now by checking if the current CPU was accessing - the log buffer when an NMI fired; in this case, the NMI's prints will be - dropped, as there is no way to safely service them without creating a - dedicated log buffer for them. This is achieved by forbidding reentrancy - with respect to _stp_print_trylock_irqsave() when the runtime context - isn't held. Reentrancy is otherwise allowed when the runtime context is - held because the runtime context provides reentrancy protection. - -diff --git a/runtime/linux/io.c b/runtime/linux/io.c -index 74a032c52..122708e2a 100644 ---- a/runtime/linux/io.c -+++ b/runtime/linux/io.c -@@ -20,9 +20,6 @@ - - #define WARN_STRING "WARNING: " - #define ERR_STRING "ERROR: " --#if (STP_LOG_BUF_LEN < 10) /* sizeof(WARN_STRING) */ --#error "STP_LOG_BUF_LEN is too short" --#endif - - enum code { INFO=0, WARN, ERROR, DBUG }; - -@@ -31,25 +28,37 @@ static void _stp_vlog (enum code type, const char *func, int line, const char *f - - static void _stp_vlog (enum code type, const char *func, int line, const char *fmt, va_list args) - { -+ struct _stp_log *log; -+ unsigned long flags; -+ size_t bytes_avail; - int num; -- char *buf = per_cpu_ptr(Stp_lbuf, get_cpu()); -+ char *buf; - int start = 0; - -+ if (!_stp_print_trylock_irqsave(&flags)) -+ return; -+ -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ bytes_avail = STP_BUFFER_SIZE - log->len; -+ if (unlikely(!bytes_avail)) -+ goto err_unlock; -+ -+ buf = &log->buf[log->len]; - if (type == DBUG) { -- start = _stp_snprintf(buf, STP_LOG_BUF_LEN, "%s:%d: ", func, line); -+ start = _stp_snprintf(buf, bytes_avail, "%s:%d: ", func, line); - } else if (type == WARN) { -- /* This strcpy() is OK, since we know STP_LOG_BUF_LEN -- * is > sizeof(WARN_STRING). */ -- strcpy (buf, WARN_STRING); -- start = sizeof(WARN_STRING) - 1; -+ strncpy(buf, WARN_STRING, bytes_avail); -+ start = min(bytes_avail, sizeof(WARN_STRING) - 1); - } else if (type == ERROR) { -- /* This strcpy() is OK, since we know STP_LOG_BUF_LEN -- * is > sizeof(ERR_STRING) (which is < sizeof(WARN_STRING). */ -- strcpy (buf, ERR_STRING); -- start = sizeof(ERR_STRING) - 1; -+ strncpy(buf, ERR_STRING, bytes_avail); -+ start = min(bytes_avail, sizeof(ERR_STRING) - 1); - } - -- num = vscnprintf (buf + start, STP_LOG_BUF_LEN - start - 1, fmt, args); -+ bytes_avail -= start; -+ if (unlikely(!bytes_avail)) -+ goto err_unlock; -+ -+ num = vscnprintf(buf + start, bytes_avail - 1, fmt, args); - if (num + start) { - if (buf[num + start - 1] != '\n') { - buf[num + start] = '\n'; -@@ -66,12 +75,13 @@ static void _stp_vlog (enum code type, const char *func, int line, const char *f - if (type != DBUG) { - _stp_ctl_send(STP_OOB_DATA, buf, start + num + 1); - } else { -- _stp_print(buf); -- _stp_print_flush(); -+ log->len += start + num; -+ __stp_print_flush(log); - } - #endif - } -- put_cpu(); -+err_unlock: -+ _stp_print_unlock_irqrestore(&flags); - } - - /** Prints warning. -diff --git a/runtime/linux/print.c b/runtime/linux/print.c -index 777bca8b0..2aa2f1c8d 100644 ---- a/runtime/linux/print.c -+++ b/runtime/linux/print.c -@@ -35,84 +35,179 @@ - * @{ - */ - --typedef struct __stp_pbuf { -- uint32_t len; /* bytes used in the buffer */ -+struct _stp_log { -+ unsigned int len; /* Bytes used in the buffer */ - char buf[STP_BUFFER_SIZE]; --} _stp_pbuf; -+ atomic_t reentrancy_lock; -+}; -+#include "print_flush.c" - --static void *Stp_pbuf = NULL; -+static struct _stp_log *_stp_log_pcpu; -+ -+/* -+ * An atomic counter is used to synchronize every possible print buffer usage -+ * with the _stp_print_cleanup() function. The cleanup function sets the counter -+ * to INT_MAX after waiting for everything using the print buffer to finish. We -+ * cannot use a lock primitive to implement this because lock_acquire() contains -+ * tracepoints and print statements are used both inside and outside of probes. -+ * If the lock were only used inside probes, the runtime context would protect -+ * us from recursing into the lock_acquire() tracepoints and deadlocking. We -+ * instead use _stp_print_ctr as if it were a read-write lock. -+ */ -+static atomic_t _stp_print_ctr = ATOMIC_INIT(0); - --/** private buffer for _stp_vlog() */ --#ifndef STP_LOG_BUF_LEN --#define STP_LOG_BUF_LEN 256 --#endif -+/* -+ * This disables IRQs to make per-CPU print buffer accesses atomic. There is a -+ * reentrancy protection mechanism specifically for NMIs, since they can violate -+ * our atomic guarantee. Reentrancy is otherwise allowed within code sections -+ * that have the runtime context held (via _stp_runtime_entryfn_get_context()). -+ */ -+static bool _stp_print_trylock_irqsave(unsigned long *flags) -+{ -+ bool context_held = false; -+ struct _stp_log *log; -+ -+ local_irq_save(*flags); -+ if (!atomic_add_unless(&_stp_print_ctr, 1, INT_MAX)) -+ goto irq_restore; -+ -+ /* -+ * Check the per-CPU reentrancy lock for contention, unless the runtime -+ * context is already held, in which case we already have reentrancy -+ * protection. Otherwise, if the reentrancy lock is contented, that -+ * means we're either inside an NMI that fired while the current CPU was -+ * accessing the log buffer, or something is trying to nest calls to -+ * _stp_print_trylock_irqsave(). Our only choice is to reject the log -+ * access attempt in this case because log buffer corruption and panics -+ * could ensue if we're inside an NMI. -+ */ -+ if (_stp_runtime_context_trylock()) { -+ struct context *c = _stp_runtime_get_context(); -+ context_held = c && atomic_read(&c->busy); -+ _stp_runtime_context_unlock(); -+ } - --typedef char _stp_lbuf[STP_LOG_BUF_LEN]; --static void *Stp_lbuf = NULL; -+ /* Fall back onto the reentrancy lock if the context isn't held */ -+ if (!context_held) { -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ if (atomic_cmpxchg(&log->reentrancy_lock, 0, 1)) -+ goto print_unlock; -+ } -+ -+ return true; -+ -+print_unlock: -+ atomic_dec(&_stp_print_ctr); -+irq_restore: -+ local_irq_restore(*flags); -+ return false; -+} -+ -+static void _stp_print_unlock_irqrestore(unsigned long *flags) -+{ -+ bool context_held = false; -+ struct _stp_log *log; -+ -+ if (_stp_runtime_context_trylock()) { -+ struct context *c = _stp_runtime_get_context(); -+ context_held = c && atomic_read(&c->busy); -+ _stp_runtime_context_unlock(); -+ } -+ -+ if (!context_held) { -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ atomic_set(&log->reentrancy_lock, 0); -+ } -+ -+ atomic_dec(&_stp_print_ctr); -+ local_irq_restore(*flags); -+} - - /* create percpu print and io buffers */ - static int _stp_print_init (void) - { -- Stp_pbuf = _stp_alloc_percpu(sizeof(_stp_pbuf)); -- if (unlikely(Stp_pbuf == 0)) -- return -1; -- -- /* now initialize IO buffer used in io.c */ -- Stp_lbuf = _stp_alloc_percpu(sizeof(_stp_lbuf)); -- if (unlikely(Stp_lbuf == 0)) { -- _stp_free_percpu(Stp_pbuf); -- return -1; -+ unsigned int cpu; -+ -+ _stp_log_pcpu = _stp_alloc_percpu(sizeof(*_stp_log_pcpu)); -+ if (!_stp_log_pcpu) -+ return -ENOMEM; -+ -+ for_each_possible_cpu(cpu) { -+ struct _stp_log *log = per_cpu_ptr(_stp_log_pcpu, cpu); -+ -+ log->reentrancy_lock = (atomic_t)ATOMIC_INIT(0); - } - return 0; - } - - static void _stp_print_cleanup (void) - { -- if (Stp_pbuf) -- _stp_free_percpu(Stp_pbuf); -- if (Stp_lbuf) -- _stp_free_percpu(Stp_lbuf); --} -+ unsigned int cpu; - --#include "print_flush.c" -+ /* Wait for the loggers to finish modifying the print buffers */ -+ while (atomic_cmpxchg(&_stp_print_ctr, 0, INT_MAX)) -+ cpu_relax(); -+ -+ for_each_possible_cpu(cpu) { -+ struct _stp_log *log = per_cpu_ptr(_stp_log_pcpu, cpu); -+ -+ /* -+ * Flush anything that could be left in the print buffer. It is -+ * safe to do this without any kind of synchronization mechanism -+ * because nothing is using this print buffer anymore. -+ */ -+ __stp_print_flush(log); -+ } -+ -+ _stp_free_percpu(_stp_log_pcpu); -+} - - static inline void _stp_print_flush(void) - { -- stp_print_flush(per_cpu_ptr(Stp_pbuf, smp_processor_id())); -+ struct _stp_log *log; -+ unsigned long flags; -+ -+ if (!_stp_print_trylock_irqsave(&flags)) -+ return; -+ -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ __stp_print_flush(log); -+ _stp_print_unlock_irqrestore(&flags); - } -+ - #ifndef STP_MAXBINARYARGS - #define STP_MAXBINARYARGS 127 - #endif - - --/** Reserves space in the output buffer for direct I/O. -+/** Reserves space in the output buffer for direct I/O. Must be called with -+ * _stp_print_trylock_irqsave() held. - */ - static void * _stp_reserve_bytes (int numbytes) - { -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- int size = STP_BUFFER_SIZE - pb->len; -- void * ret; -+ struct _stp_log *log; -+ char *ret; - - if (unlikely(numbytes == 0 || numbytes > STP_BUFFER_SIZE)) - return NULL; - -- if (unlikely(numbytes > size)) -- _stp_print_flush(); -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ if (unlikely(numbytes > (STP_BUFFER_SIZE - log->len))) -+ __stp_print_flush(log); - -- ret = pb->buf + pb->len; -- pb->len += numbytes; -+ ret = &log->buf[log->len]; -+ log->len += numbytes; - return ret; - } - - - static void _stp_unreserve_bytes (int numbytes) - { -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- -- if (unlikely(numbytes == 0 || numbytes > pb->len)) -- return; -+ struct _stp_log *log; - -- pb->len -= numbytes; -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ if (numbytes <= log->len) -+ log->len -= numbytes; - } - - /** Write 64-bit args directly into the output stream. -@@ -123,22 +218,25 @@ static void _stp_unreserve_bytes (int numbytes) - */ - static void _stp_print_binary (int num, ...) - { -+ unsigned long flags; - va_list vargs; - int i; - int64_t *args; -- -+ - if (unlikely(num > STP_MAXBINARYARGS)) - num = STP_MAXBINARYARGS; - -- args = _stp_reserve_bytes(num * sizeof(int64_t)); -+ if (!_stp_print_trylock_irqsave(&flags)) -+ return; - -- if (likely(args != NULL)) { -+ args = _stp_reserve_bytes(num * sizeof(int64_t)); -+ if (args) { - va_start(vargs, num); -- for (i = 0; i < num; i++) { -+ for (i = 0; i < num; i++) - args[i] = va_arg(vargs, int64_t); -- } - va_end(vargs); - } -+ _stp_print_unlock_irqrestore(&flags); - } - - /** Print into the print buffer. -@@ -149,6 +247,7 @@ static void _stp_print_binary (int num, ...) - static void _stp_printf (const char *fmt, ...) - { - va_list args; -+ - va_start(args, fmt); - _stp_vsnprintf(NULL, 0, fmt, args); - va_end(args); -@@ -160,37 +259,36 @@ static void _stp_printf (const char *fmt, ...) - - static void _stp_print (const char *str) - { -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- char *end = pb->buf + STP_BUFFER_SIZE; -- char *ptr = pb->buf + pb->len; -- char *instr = (char *)str; -- -- while (ptr < end && *instr) -- *ptr++ = *instr++; -- -- /* Did loop terminate due to lack of buffer space? */ -- if (unlikely(*instr)) { -- /* Don't break strings across subbufs. */ -- /* Restart after flushing. */ -- _stp_print_flush(); -- end = pb->buf + STP_BUFFER_SIZE; -- ptr = pb->buf + pb->len; -- instr = (char *)str; -- while (ptr < end && *instr) -- *ptr++ = *instr++; -+ struct _stp_log *log; -+ unsigned long flags; -+ -+ if (!_stp_print_trylock_irqsave(&flags)) -+ return; -+ -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ while (1) { -+ while (log->len < STP_BUFFER_SIZE && *str) -+ log->buf[log->len++] = *str++; -+ if (likely(!*str)) -+ break; -+ __stp_print_flush(log); - } -- pb->len = ptr - pb->buf; -+ _stp_print_unlock_irqrestore(&flags); - } - - static void _stp_print_char (const char c) - { -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- int size = STP_BUFFER_SIZE - pb->len; -- if (unlikely(1 >= size)) -- _stp_print_flush(); -- -- pb->buf[pb->len] = c; -- pb->len ++; -+ struct _stp_log *log; -+ unsigned long flags; -+ -+ if (!_stp_print_trylock_irqsave(&flags)) -+ return; -+ -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ if (unlikely(log->len == STP_BUFFER_SIZE)) -+ __stp_print_flush(log); -+ log->buf[log->len++] = c; -+ _stp_print_unlock_irqrestore(&flags); - } - - static void _stp_print_kernel_info(char *sname, char *vstr, int ctx, int num_probes) -diff --git a/runtime/print.h b/runtime/print.h -index ede71f033..ffdea594d 100644 ---- a/runtime/print.h -+++ b/runtime/print.h -@@ -10,6 +10,9 @@ - #ifndef _STP_PRINT_H_ - #define _STP_PRINT_H_ - -+/* The lock must be held with IRQs disabled to do any printing */ -+static bool _stp_print_trylock_irqsave(unsigned long *flags); -+static void _stp_print_unlock_irqrestore(unsigned long *flags); - static int _stp_print_init(void); - static void _stp_print_cleanup(void); - static void *_stp_reserve_bytes(int numbytes); -diff --git a/runtime/print_flush.c b/runtime/print_flush.c -index cf40a2645..acd6a32d9 100644 ---- a/runtime/print_flush.c -+++ b/runtime/print_flush.c -@@ -13,40 +13,31 @@ - * is filled, or this is called. This MUST be called before returning - * from a probe or accumulated output in the print buffer will be lost. - * -- * @note Preemption must be disabled to use this. -+ * @note Interrupts must be disabled to use this. - */ - --static STP_DEFINE_SPINLOCK(_stp_print_lock); -- --void stp_print_flush(_stp_pbuf *pb) -+static void __stp_print_flush(struct _stp_log *log) - { -- size_t len = pb->len; -+ size_t len = log->len; - void *entry = NULL; - - /* check to see if there is anything in the buffer */ - if (likely(len == 0)) - return; - -- pb->len = 0; -- -- if (unlikely(_stp_transport_get_state() != STP_TRANSPORT_RUNNING)) -- return; -+ log->len = 0; - - dbug_trans(1, "len = %zu\n", len); - - #ifdef STP_BULKMODE - #ifdef NO_PERCPU_HEADERS - { -- struct context* __restrict__ c = NULL; -- char *bufp = pb->buf; -+ char *bufp = log->buf; - int inode_locked; - -- c = _stp_runtime_entryfn_get_context(); -- - if (!(inode_locked = _stp_transport_trylock_relay_inode())) { - atomic_inc (&_stp_transport_failures); - #ifndef STP_TRANSPORT_RISKY -- _stp_runtime_entryfn_put_context(c); - return; - #endif - } -@@ -70,26 +61,20 @@ void stp_print_flush(_stp_pbuf *pb) - - if (inode_locked) - _stp_transport_unlock_relay_inode(); -- -- _stp_runtime_entryfn_put_context(c); - } - - #else /* !NO_PERCPU_HEADERS */ - - { -- struct context* __restrict__ c = NULL; -- char *bufp = pb->buf; -+ char *bufp = log->buf; - struct _stp_trace t = { .sequence = _stp_seq_inc(), - .pdu_len = len}; - size_t bytes_reserved; - int inode_locked; - -- c = _stp_runtime_entryfn_get_context(); -- - if (!(inode_locked = _stp_transport_trylock_relay_inode())) { - atomic_inc (&_stp_transport_failures); - #ifndef STP_TRANSPORT_RISKY -- _stp_runtime_entryfn_put_context(c); - return; - #endif - } -@@ -124,48 +109,24 @@ void stp_print_flush(_stp_pbuf *pb) - - if (inode_locked) - _stp_transport_unlock_relay_inode(); -- -- _stp_runtime_entryfn_put_context(c); - } - #endif /* !NO_PERCPU_HEADERS */ - - #else /* !STP_BULKMODE */ - - { -- unsigned long flags; -- struct context* __restrict__ c = NULL; -- char *bufp = pb->buf; -+ char *bufp = log->buf; - int inode_locked; - -- /* Prevent probe reentrancy on _stp_print_lock. -- * -- * Since stp_print_flush may be called from probe context, we -- * have to make sure that its lock, _stp_print_lock, can't -- * possibly be held outside probe context too. We ensure this -- * by grabbing the context here, so any probe triggered by this -- * region will appear reentrant and be skipped rather than -- * deadlock. Failure to get_context just means we're already -- * in a probe, which is fine. -- * -- * (see also _stp_ctl_send for a similar situation) -- * -- * A better solution would be to replace this -- * concurrency-control-laden effort with a lockless -- * algorithm. -- */ -- c = _stp_runtime_entryfn_get_context(); -- - if (!(inode_locked = _stp_transport_trylock_relay_inode())) { - atomic_inc (&_stp_transport_failures); - #ifndef STP_TRANSPORT_RISKY - dbug_trans(0, "discarding %zu bytes of data\n", len); -- _stp_runtime_entryfn_put_context(c); - return; - #endif - } - - dbug_trans(1, "calling _stp_data_write...\n"); -- stp_spin_lock_irqsave(&_stp_print_lock, flags); - while (len > 0) { - size_t bytes_reserved; - -@@ -182,12 +143,9 @@ void stp_print_flush(_stp_pbuf *pb) - break; - } - } -- stp_spin_unlock_irqrestore(&_stp_print_lock, flags); - - if (inode_locked) - _stp_transport_unlock_relay_inode(); -- -- _stp_runtime_entryfn_put_context(c); - } - #endif /* !STP_BULKMODE */ - } -diff --git a/runtime/stack.c b/runtime/stack.c -index 241ccf793..da23d4395 100644 ---- a/runtime/stack.c -+++ b/runtime/stack.c -@@ -690,13 +690,20 @@ static void _stp_stack_kernel_sprint(char *str, int size, struct context* c, - * then call _stp_stack_print, - * then copy the result into the output string - * and clear the print buffer. */ -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- _stp_print_flush(); -+ struct _stp_log *log; -+ unsigned long flags; - -- _stp_stack_kernel_print(c, sym_flags); -+ if (!_stp_print_trylock_irqsave(&flags)) { -+ *str = '\0'; -+ return; -+ } - -- strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); -- pb->len = 0; -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ __stp_print_flush(log); -+ _stp_stack_kernel_print(c, sym_flags); -+ strlcpy(str, log->buf, min_t(int, size, log->len)); -+ log->len = 0; -+ _stp_print_unlock_irqrestore(&flags); - } - - static void _stp_stack_user_sprint(char *str, int size, struct context* c, -@@ -707,13 +714,20 @@ static void _stp_stack_user_sprint(char *str, int size, struct context* c, - * then call _stp_stack_print, - * then copy the result into the output string - * and clear the print buffer. */ -- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); -- _stp_print_flush(); -+ struct _stp_log *log; -+ unsigned long flags; - -- _stp_stack_user_print(c, sym_flags); -+ if (!_stp_print_trylock_irqsave(&flags)) { -+ *str = '\0'; -+ return; -+ } - -- strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); -- pb->len = 0; -+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); -+ __stp_print_flush(log); -+ _stp_stack_user_print(c, sym_flags); -+ strlcpy(str, log->buf, min_t(int, size, log->len)); -+ log->len = 0; -+ _stp_print_unlock_irqrestore(&flags); - } - - #endif /* _STACK_C_ */ -diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c -index 57955334b..44e69b68c 100644 ---- a/runtime/transport/transport.c -+++ b/runtime/transport/transport.c -@@ -540,8 +540,8 @@ static void _stp_transport_close(void) - current->pid); - _stp_cleanup_and_exit(0); - _stp_unregister_ctl_channel(); -+ _stp_print_cleanup(); /* Requires the transport, so free this first */ - _stp_transport_fs_close(); -- _stp_print_cleanup(); /* free print buffers */ - _stp_mem_debug_done(); - - dbug_trans(1, "---- CLOSED ----\n"); -diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c -index 28fd18f16..417d9f7f3 100644 ---- a/runtime/vsprintf.c -+++ b/runtime/vsprintf.c -@@ -542,6 +542,8 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - int num_bytes = 0; -+ unsigned long irqflags = 0; -+ bool got_print_lock = false; - - /* Reject out-of-range values early */ - if (unlikely((int) size < 0)) -@@ -724,11 +726,14 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) - num_bytes = STP_BUFFER_SIZE; - } - -+ if (!_stp_print_trylock_irqsave(&irqflags)) -+ return 0; - str = (char*)_stp_reserve_bytes(num_bytes); - if (str == NULL) { - _stp_error("Couldn't reserve any print buffer space\n"); -- return 0; -+ goto err_unlock; - } -+ got_print_lock = true; - size = num_bytes; - end = str + size - 1; - -@@ -820,8 +825,10 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) - field_width, precision, - *fmt, flags); - if (unlikely(str == NULL)) { -- if (num_bytes > 0) -+ if (num_bytes > 0) { - _stp_unreserve_bytes(num_bytes); -+ goto err_unlock; -+ } - return 0; - } - continue; -@@ -923,7 +930,14 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; - } -+ -+ if (got_print_lock) -+ _stp_print_unlock_irqrestore(&irqflags); - return str-buf; -+ -+err_unlock: -+ _stp_print_unlock_irqrestore(&irqflags); -+ return 0; - } - - #endif /* _VSPRINTF_C_ */ -diff --git a/translate.cxx b/translate.cxx -index 53f1d0725..f0195486c 100644 ---- a/translate.cxx -+++ b/translate.cxx -@@ -1354,6 +1354,9 @@ c_unparser::emit_compiled_printfs () - o->newline() << "unsigned long ptr_value;"; - o->newline() << "int num_bytes;"; - -+ if (print_to_stream) -+ o->newline() << "unsigned long irqflags;"; -+ - o->newline() << "(void) width;"; - o->newline() << "(void) precision;"; - o->newline() << "(void) ptr_value;"; -@@ -1452,7 +1455,9 @@ c_unparser::emit_compiled_printfs () - } - - o->newline() << "num_bytes = clamp(num_bytes, 0, STP_BUFFER_SIZE);"; -- o->newline() << "str = (char*)_stp_reserve_bytes(num_bytes);"; -+ o->newline() << "if (!_stp_print_trylock_irqsave(&irqflags))"; -+ o->newline(1) << "return;"; -+ o->newline(-1) << "str = (char*)_stp_reserve_bytes(num_bytes);"; - o->newline() << "end = str ? str + num_bytes - 1 : 0;"; - } - else // !print_to_stream -@@ -1547,8 +1552,14 @@ c_unparser::emit_compiled_printfs () - o->newline() << "if (unlikely(str == NULL)) {"; - o->indent(1); - if (print_to_stream) -+ { - o->newline() << "_stp_unreserve_bytes(num_bytes);"; -- o->newline() << "return;"; -+ o->newline() << "goto err_unlock;"; -+ } -+ else -+ { -+ o->newline() << "return;"; -+ } - o->newline(-1) << "}"; - break; - -@@ -1575,6 +1586,11 @@ c_unparser::emit_compiled_printfs () - - o->newline(-1) << "}"; - -+ if (print_to_stream) -+ { -+ o->newline(-1) << "err_unlock:"; -+ o->newline(1) << "_stp_print_unlock_irqrestore(&irqflags);"; -+ } - o->newline(-1) << "}"; - } - o->newline() << "#endif // STP_LEGACY_PRINT"; -commit e8c7a2067ec7fc6315ee9bc34a010ec5f0369c5c -Author: Frank Ch. Eigler -Date: Fri Dec 4 19:33:22 2020 -0500 - - testsuite pr14536.stp: toughen - - This test case stresses nesting of heavy duty processing (backtrace - printing) within kernel interrupt processing paths. It seems to - sometimes trigger problems - so let's make the test harder to make - latent problems show up more likely. Instead of quitting after the - first irq_* function hit, stick around for 10 seconds. - -diff --git a/testsuite/systemtap.base/pr14546.stp b/testsuite/systemtap.base/pr14546.stp -index 3e59a6f3a..e4c205a8f 100644 ---- a/testsuite/systemtap.base/pr14546.stp -+++ b/testsuite/systemtap.base/pr14546.stp -@@ -2,5 +2,6 @@ probe kernel.function("irq_*").call { - x = 10; y = 10; z = 10; w = 10 - $1 - assert(!(x != 10 || y != 10 || z != 10 || w != 10), "memory got corrupted by " . @1) -- exit() - } -+ -+probe timer.s(10) { exit () } -commit cd6399e621646856824ea96b11605a0f52011272 -Author: Frank Ch. Eigler -Date: Fri Dec 4 21:33:21 2020 -0500 - - dyninst transport: add _stp_print_*lock_irq* stubs - - Recent code on the transport/linux side needs a few new (stub) - functions and type decls. - -diff --git a/runtime/dyninst/print.c b/runtime/dyninst/print.c -index 9d91224ba..c78def272 100644 ---- a/runtime/dyninst/print.c -+++ b/runtime/dyninst/print.c -@@ -107,4 +107,18 @@ static void _stp_print_char (const char c) - } - } - -+ -+/* no-op stub synchronization */ -+static bool _stp_print_trylock_irqsave(unsigned long *flags) -+{ -+ (void) flags; -+ return true; -+} -+ -+static void _stp_print_unlock_irqrestore(unsigned long *flags) -+{ -+ (void) flags; -+} -+ -+ - #endif /* _STAPDYN_PRINT_C_ */ -diff --git a/runtime/dyninst/runtime_defines.h b/runtime/dyninst/runtime_defines.h -index 5c3dec519..d00c76a21 100644 ---- a/runtime/dyninst/runtime_defines.h -+++ b/runtime/dyninst/runtime_defines.h -@@ -7,3 +7,6 @@ - #define STAPCONF_PAGEFAULT_DISABLE 1 - #define pagefault_disable() - #define pagefault_enable() -+ -+typedef int bool; -+enum { false=0, true=1 }; -commit fd93cf71df80f7bb5aae707ea5a5875727a85770 -Author: Sultan Alsawaf -Date: Wed Dec 9 12:55:10 2020 -0800 - - PR26844: fix off-by-one error when copying printed backtraces - - Since log->buf isn't null-terminated, log->len represents the total - number of bytes present in the log buffer for copying. The use of - strlcpy() here with log->len as its size results in log->len - 1 bytes - being copied, with the log->len'nth byte of the output buffer being set - to zero to terminate the string. Use memcpy() instead to remedy this, - while ensuring that the output buffer has space for null termination, - since the output buffer needs to be terminated. - -diff --git a/runtime/stack.c b/runtime/stack.c -index da23d4395..85883d6c4 100644 ---- a/runtime/stack.c -+++ b/runtime/stack.c -@@ -692,6 +692,7 @@ static void _stp_stack_kernel_sprint(char *str, int size, struct context* c, - * and clear the print buffer. */ - struct _stp_log *log; - unsigned long flags; -+ int bytes; - - if (!_stp_print_trylock_irqsave(&flags)) { - *str = '\0'; -@@ -701,7 +702,9 @@ static void _stp_stack_kernel_sprint(char *str, int size, struct context* c, - log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); - __stp_print_flush(log); - _stp_stack_kernel_print(c, sym_flags); -- strlcpy(str, log->buf, min_t(int, size, log->len)); -+ bytes = min_t(int, size - 1, log->len); -+ memcpy(str, log->buf, bytes); -+ str[bytes] = '\0'; - log->len = 0; - _stp_print_unlock_irqrestore(&flags); - } -@@ -716,6 +719,7 @@ static void _stp_stack_user_sprint(char *str, int size, struct context* c, - * and clear the print buffer. */ - struct _stp_log *log; - unsigned long flags; -+ int bytes; - - if (!_stp_print_trylock_irqsave(&flags)) { - *str = '\0'; -@@ -725,7 +729,9 @@ static void _stp_stack_user_sprint(char *str, int size, struct context* c, - log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); - __stp_print_flush(log); - _stp_stack_user_print(c, sym_flags); -- strlcpy(str, log->buf, min_t(int, size, log->len)); -+ bytes = min_t(int, size - 1, log->len); -+ memcpy(str, log->buf, bytes); -+ str[bytes] = '\0'; - log->len = 0; - _stp_print_unlock_irqrestore(&flags); - } -commit 8819e2a04596deb2fe427d261bebcaf3d2620dfb -Author: Sultan Alsawaf -Date: Wed Dec 9 17:22:20 2020 -0800 - - always use per-cpu bulkmode relayfs files to communicate with userspace - - Using a mutex_trylock() in __stp_print_flush() leads to a lot of havoc, - for numerous. Firstly, since __stp_print_flush() can be called from IRQ - context, holding the inode mutex from here would make the mutex owner - become nonsense, since mutex locks can only be held in contexts backed - by the scheduler. Secondly, the mutex_trylock implementation has a - spin_lock() inside of it that leads to two issues: IRQs aren't disabled - when acquiring this spin_lock(), so using it from IRQ context can lead - to a deadlock, and since spin locks can have tracepoints via - lock_acquire(), the spin_lock() can recurse on itself inside a stap - probe and deadlock, like so: - - #0 [ffff88017f6d7a08] kvm_wait at ffffffff81079f5a - #1 [ffff88017f6d7a30] __pv_queued_spin_lock_slowpath at ffffffff8114f51e - #2 [ffff88017f6d7a70] queued_spin_lock_slowpath at ffffffff810e842b - #3 [ffff88017f6d7a80] mutex_trylock at ffffffff81882b1b - #4 [ffff88017f6d7ab8] _stp_transport_trylock_relay_inode at ffffffffc0c599df [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #5 [ffff88017f6d7ad8] __stp_print_flush at ffffffffc09b6483 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #6 [ffff88017f6d7b10] probe_7879 at ffffffffc0a98c85 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #7 [ffff88017f6d7b38] enter_real_tracepoint_probe_1543 at ffffffffc0c3b757 [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #8 [ffff88017f6d7b70] enter_tracepoint_probe_1543 at ffffffffc09b117e [stap_47650d3377d05db0ab7cbbaa25765809__11657] - #9 [ffff88017f6d7b80] lock_acquire at ffffffff811460ba - - The reason the mutex_trylock() was needed in the first place was because - staprun doesn't properly use the relayfs API when reading buffers in - non-bulk mode. It tries to read all CPUs' buffers from a single thread, - when it should be reading each CPU's buffer from a thread running on - said CPU in order to utilize relayfs' synchronization guarantees, which - are made by disabling IRQs on the local CPU when a buffer is modified. - - This change makes staprun always use per-CPU threads to read print - buffers so that we don't need the mutex_trylock() in the print flush - routine, which resolves a wide variety of serious bugs. - - We also need to adjust the transport sub-buffer count to accommodate for - frequent print flushing. The sub-buffer size is now reduced to match the - log buffer size, which is 8192 by default, and the number of sub-buffers - is increased to 256. This uses exactly the same amount of memory as - before. - -diff --git a/runtime/print_flush.c b/runtime/print_flush.c -index acd6a32d9..f4d72d30f 100644 ---- a/runtime/print_flush.c -+++ b/runtime/print_flush.c -@@ -18,6 +18,7 @@ - - static void __stp_print_flush(struct _stp_log *log) - { -+ char *bufp = log->buf; - size_t len = log->len; - void *entry = NULL; - -@@ -26,126 +27,20 @@ static void __stp_print_flush(struct _stp_log *log) - return; - - log->len = 0; -- - dbug_trans(1, "len = %zu\n", len); -- --#ifdef STP_BULKMODE --#ifdef NO_PERCPU_HEADERS -- { -- char *bufp = log->buf; -- int inode_locked; -- -- if (!(inode_locked = _stp_transport_trylock_relay_inode())) { -- atomic_inc (&_stp_transport_failures); --#ifndef STP_TRANSPORT_RISKY -- return; --#endif -- } -- -- while (len > 0) { -- size_t bytes_reserved; -- -- bytes_reserved = _stp_data_write_reserve(len, &entry); -- if (likely(entry && bytes_reserved > 0)) { -- memcpy(_stp_data_entry_data(entry), bufp, -- bytes_reserved); -- _stp_data_write_commit(entry); -- bufp += bytes_reserved; -- len -= bytes_reserved; -- } -- else { -- atomic_inc(&_stp_transport_failures); -- break; -- } -- } -- -- if (inode_locked) -- _stp_transport_unlock_relay_inode(); -- } -- --#else /* !NO_PERCPU_HEADERS */ -- -- { -- char *bufp = log->buf; -- struct _stp_trace t = { .sequence = _stp_seq_inc(), -- .pdu_len = len}; -+ do { - size_t bytes_reserved; -- int inode_locked; - -- if (!(inode_locked = _stp_transport_trylock_relay_inode())) { -- atomic_inc (&_stp_transport_failures); --#ifndef STP_TRANSPORT_RISKY -- return; --#endif -- } -- -- bytes_reserved = _stp_data_write_reserve(sizeof(struct _stp_trace), &entry); -- if (likely(entry && bytes_reserved > 0)) { -- /* prevent unaligned access by using memcpy() */ -- memcpy(_stp_data_entry_data(entry), &t, sizeof(t)); -+ bytes_reserved = _stp_data_write_reserve(len, &entry); -+ if (likely(entry && bytes_reserved)) { -+ memcpy(_stp_data_entry_data(entry), bufp, -+ bytes_reserved); - _stp_data_write_commit(entry); -- } -- else { -+ bufp += bytes_reserved; -+ len -= bytes_reserved; -+ } else { - atomic_inc(&_stp_transport_failures); -- goto done; -+ break; - } -- -- while (len > 0) { -- bytes_reserved = _stp_data_write_reserve(len, &entry); -- if (likely(entry && bytes_reserved > 0)) { -- memcpy(_stp_data_entry_data(entry), bufp, -- bytes_reserved); -- _stp_data_write_commit(entry); -- bufp += bytes_reserved; -- len -= bytes_reserved; -- } -- else { -- atomic_inc(&_stp_transport_failures); -- break; -- } -- } -- -- done: -- -- if (inode_locked) -- _stp_transport_unlock_relay_inode(); -- } --#endif /* !NO_PERCPU_HEADERS */ -- --#else /* !STP_BULKMODE */ -- -- { -- char *bufp = log->buf; -- int inode_locked; -- -- if (!(inode_locked = _stp_transport_trylock_relay_inode())) { -- atomic_inc (&_stp_transport_failures); --#ifndef STP_TRANSPORT_RISKY -- dbug_trans(0, "discarding %zu bytes of data\n", len); -- return; --#endif -- } -- -- dbug_trans(1, "calling _stp_data_write...\n"); -- while (len > 0) { -- size_t bytes_reserved; -- -- bytes_reserved = _stp_data_write_reserve(len, &entry); -- if (likely(entry && bytes_reserved > 0)) { -- memcpy(_stp_data_entry_data(entry), bufp, -- bytes_reserved); -- _stp_data_write_commit(entry); -- bufp += bytes_reserved; -- len -= bytes_reserved; -- } -- else { -- atomic_inc(&_stp_transport_failures); -- break; -- } -- } -- -- if (inode_locked) -- _stp_transport_unlock_relay_inode(); -- } --#endif /* !STP_BULKMODE */ -+ } while (len > 0); - } -diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c -index ff621f71d..2ba5eea7d 100644 ---- a/runtime/transport/relay_v2.c -+++ b/runtime/transport/relay_v2.c -@@ -67,7 +67,7 @@ static size_t __stp_relay_switch_subbuf(struct rchan_buf *buf, size_t length) - return 0; - - if (unlikely(length > buf->chan->subbuf_size)) -- goto toobig; -+ length = buf->chan->subbuf_size; - - if (buf->offset != buf->chan->subbuf_size + 1) { - buf->prev_padding = buf->chan->subbuf_size - buf->offset; -@@ -98,14 +98,7 @@ static size_t __stp_relay_switch_subbuf(struct rchan_buf *buf, size_t length) - buf->data = new; - buf->padding[new_subbuf] = 0; - -- if (unlikely(length + buf->offset > buf->chan->subbuf_size)) -- goto toobig; -- - return length; -- --toobig: -- buf->chan->last_toobig = length; -- return 0; - } - - static void __stp_relay_wakeup_readers(struct rchan_buf *buf) -@@ -117,24 +110,17 @@ static void __stp_relay_wakeup_readers(struct rchan_buf *buf) - - static void __stp_relay_wakeup_timer(stp_timer_callback_parameter_t unused) - { --#ifdef STP_BULKMODE - int i; --#endif - - if (atomic_read(&_stp_relay_data.wakeup)) { - struct rchan_buf *buf; - - atomic_set(&_stp_relay_data.wakeup, 0); --#ifdef STP_BULKMODE - for_each_possible_cpu(i) { - buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, - i); - __stp_relay_wakeup_readers(buf); - } --#else -- buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, 0); -- __stp_relay_wakeup_readers(buf); --#endif - } - - if (atomic_read(&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) -@@ -235,55 +221,8 @@ static void _stp_transport_data_fs_stop(void) - atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); - del_timer_sync(&_stp_relay_data.timer); - dbug_trans(0, "flushing...\n"); -- if (_stp_relay_data.rchan) { -- struct rchan_buf *buf; -- -- /* NB we cannot call relay_flush() directly here since -- * we need to do inode locking ourselves. -- */ -- --#ifdef STP_BULKMODE -- unsigned int i; -- struct rchan *rchan = _stp_relay_data.rchan; -- -- for_each_possible_cpu(i) { -- buf = _stp_get_rchan_subbuf(rchan->buf, i); -- if (buf) { -- struct inode *inode = buf->dentry->d_inode; -- -- /* NB we are in the syscall context which -- * allows sleeping. The following inode -- * locking might sleep. See PR26131. */ -- _stp_lock_inode(inode); -- -- /* NB we intentionally avoids calling -- * our own __stp_relay_switch_subbuf() -- * since here we can sleep. */ -- relay_switch_subbuf(buf, 0); -- -- _stp_unlock_inode(inode); -- } -- } --#else /* !STP_BULKMODE */ -- buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, 0); -- -- if (buf != NULL) { -- struct inode *inode = buf->dentry->d_inode; -- -- /* NB we are in the syscall context which allows -- * sleeping. The following inode locking might -- * sleep. See PR26131. */ -- _stp_lock_inode(inode); -- -- /* NB we intentionally avoids calling -- * our own __stp_relay_switch_subbuf() -- * since here we can sleep. */ -- relay_switch_subbuf(buf, 0); -- -- _stp_unlock_inode(inode); -- } --#endif -- } -+ if (_stp_relay_data.rchan) -+ relay_flush(_stp_relay_data.rchan); - } - } - -@@ -308,9 +247,7 @@ static int _stp_transport_data_fs_init(void) - - /* Create "trace" file. */ - npages = _stp_subbuf_size * _stp_nsubbufs; --#ifdef STP_BULKMODE - npages *= num_online_cpus(); --#endif - npages >>= PAGE_SHIFT; - si_meminfo(&si); - #define MB(i) (unsigned long)((i) >> (20 - PAGE_SHIFT)) -@@ -347,9 +284,7 @@ static int _stp_transport_data_fs_init(void) - { - u64 relay_mem; - relay_mem = _stp_subbuf_size * _stp_nsubbufs; --#ifdef STP_BULKMODE - relay_mem *= num_online_cpus(); --#endif - _stp_allocated_net_memory += relay_mem; - _stp_allocated_memory += relay_mem; - } -@@ -386,12 +321,7 @@ _stp_data_write_reserve(size_t size_request, void **entry) - return -EINVAL; - - buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, --#ifdef STP_BULKMODE -- smp_processor_id() --#else -- 0 --#endif -- ); -+ smp_processor_id()); - if (unlikely(buf->offset + size_request > buf->chan->subbuf_size)) { - size_request = __stp_relay_switch_subbuf(buf, size_request); - if (!size_request) -@@ -411,65 +341,10 @@ static unsigned char *_stp_data_entry_data(void *entry) - - static int _stp_data_write_commit(void *entry) - { -- /* Nothing to do here. */ -- return 0; --} -- --static noinline int _stp_transport_trylock_relay_inode(void) --{ -- unsigned i; - struct rchan_buf *buf; -- struct inode *inode; --#ifdef DEBUG_TRANS -- cycles_t begin; --#endif - - buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, --#ifdef STP_BULKMODE -- smp_processor_id() --#else -- 0 --#endif -- ); -- if (buf == NULL) -- return 0; -- -- inode = buf->dentry->d_inode; -- --#ifdef DEBUG_TRANS -- begin = get_cycles(); --#endif -- -- /* NB this bounded spinlock is needed for stream mode. it is observed -- * that almost all of the iterations needed are less than 50K iterations -- * or about 300K cycles. -- */ -- for (i = 0; i < 50 * 1000; i++) { -- if (_stp_trylock_inode(inode)) { -- dbug_trans(3, "got inode lock: i=%u: cycles: %llu", i, -- get_cycles() - begin); -- return 1; -- } -- } -- -- dbug_trans(0, "failed to get inode lock: i=%u: cycles: %llu", i, -- get_cycles() - begin); -+ smp_processor_id()); -+ __stp_relay_switch_subbuf(buf, 0); - return 0; - } -- --static void _stp_transport_unlock_relay_inode(void) --{ -- struct rchan_buf *buf; -- -- buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, --#ifdef STP_BULKMODE -- smp_processor_id() --#else -- 0 --#endif -- ); -- if (buf == NULL) -- return; -- -- _stp_unlock_inode(buf->dentry->d_inode); --} -diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c -index 96426eb7b..1be3e9485 100644 ---- a/runtime/transport/transport.c -+++ b/runtime/transport/transport.c -@@ -49,7 +49,6 @@ static int _stp_probes_started = 0; - * transport state flag is atomic. */ - static atomic_t _stp_transport_state = ATOMIC_INIT(_STP_TS_UNINITIALIZED); - --static inline int _stp_trylock_inode(struct inode *inode); - static inline void _stp_lock_inode(struct inode *inode); - static inline void _stp_unlock_inode(struct inode *inode); - -@@ -70,8 +69,8 @@ static inline void _stp_unlock_inode(struct inode *inode); - #include "procfs.c" - #include "control.c" - --static unsigned _stp_nsubbufs = 8; --static unsigned _stp_subbuf_size = 65536*4; -+static unsigned _stp_nsubbufs = 256; -+static unsigned _stp_subbuf_size = STP_BUFFER_SIZE; - - /* module parameters */ - static int _stp_bufsize; -@@ -643,23 +642,6 @@ err0: - return -1; - } - --/* returns 1 when the lock is successfully acquired, 0 otherwise. */ --static inline int _stp_trylock_inode(struct inode *inode) --{ --#ifdef STAPCONF_INODE_RWSEM -- return inode_trylock(inode); --#else --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) -- return mutex_trylock(&inode->i_mutex); --#else -- /* NB down_trylock() uses a different convention where 0 means -- * the lock is successfully acquired. -- */ -- return !down_trylock(&inode->i_sem); --#endif --#endif --} -- - static inline void _stp_lock_inode(struct inode *inode) - { - #ifdef STAPCONF_INODE_RWSEM -diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h -index 51723b7f5..cc09fc0ae 100644 ---- a/runtime/transport/transport.h -+++ b/runtime/transport/transport.h -@@ -98,24 +98,6 @@ enum _stp_transport_state { - */ - static enum _stp_transport_state _stp_transport_get_state(void); - --/* -- * _stp_transport_trylock_relay_inode -- * -- * This function locks the relay file inode to protect against relay readers -- * (i.e., staprun/stapio). -- * Returns whether the lock is successfully obtained. -- */ --static noinline int _stp_transport_trylock_relay_inode(void); -- --/* -- * _stp_transport_unlock_relay_inode -- * -- * This function releases the lock obtained by -- * _stp_transport_trylock_relay_inode. -- * should only call this when the lock is indeed obtained. -- */ --static void _stp_transport_unlock_relay_inode(void); -- - /* - * _stp_transport_data_fs_init - * -diff --git a/staprun/relay.c b/staprun/relay.c -index 2f5f2e06a..c76e76719 100644 ---- a/staprun/relay.c -+++ b/staprun/relay.c -@@ -131,6 +131,7 @@ static void *reader_thread(void *data) - sigset_t sigs; - off_t wsize = 0; - int fnum = 0; -+ cpu_set_t cpu_mask; - - sigemptyset(&sigs); - sigaddset(&sigs,SIGUSR2); -@@ -139,21 +140,18 @@ static void *reader_thread(void *data) - sigfillset(&sigs); - sigdelset(&sigs,SIGUSR2); - -- if (bulkmode) { -- cpu_set_t cpu_mask; -- CPU_ZERO(&cpu_mask); -- CPU_SET(cpu, &cpu_mask); -- if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) -- _perr("sched_setaffinity"); -+ CPU_ZERO(&cpu_mask); -+ CPU_SET(cpu, &cpu_mask); -+ if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) -+ _perr("sched_setaffinity"); - #ifdef NEED_PPOLL -- /* Without a real ppoll, there is a small race condition that could */ -- /* block ppoll(). So use a timeout to prevent that. */ -- timeout->tv_sec = 10; -- timeout->tv_nsec = 0; -+ /* Without a real ppoll, there is a small race condition that could */ -+ /* block ppoll(). So use a timeout to prevent that. */ -+ timeout->tv_sec = 10; -+ timeout->tv_nsec = 0; - #else -- timeout = NULL; -+ timeout = NULL; - #endif -- } - - if (reader_timeout_ms && timeout) { - timeout->tv_sec = reader_timeout_ms / 1000; -@@ -358,11 +356,6 @@ int init_relayfs(void) - _err("couldn't open %s.\n", buf); - return -1; - } -- if (ncpus > 1 && bulkmode == 0) { -- _err("ncpus=%d, bulkmode = %d\n", ncpus, bulkmode); -- _err("This is inconsistent! Please file a bug report. Exiting now.\n"); -- return -1; -- } - - /* PR7097 */ - if (load_only) -commit d86b64029598f69b47d9cf4295f30b7093f38cfc -Author: Sultan Alsawaf -Date: Wed Dec 9 17:22:27 2020 -0800 - - Revert "REVERTME: tapset-timers: work around on-the-fly deadlocks caused by mutex_trylock" - - This reverts commit 6a27888b118b7a94650a68aae028957cdd5fb5f5. - - No longer needed. As promised, we're reverting this. - -diff --git a/tapset-timers.cxx b/tapset-timers.cxx -index 503498c85..10da17cda 100644 ---- a/tapset-timers.cxx -+++ b/tapset-timers.cxx -@@ -391,11 +391,11 @@ hrtimer_derived_probe_group::emit_module_refresh (systemtap_session& s) - s.op->newline(+1) << "struct stap_hrtimer_probe* stp = &stap_hrtimer_probes[i];"; - // timer disabled, but condition says enabled? - s.op->newline( 0) << "if (!stp->enabled && stp->probe->cond_enabled) {"; -- s.op->newline(+1) << "//dbug_otf(\"enabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; -+ s.op->newline(+1) << "dbug_otf(\"enabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; - s.op->newline( 0) << "_stp_hrtimer_start(stp);"; - // timer enabled, but condition says disabled? - s.op->newline(-1) << "} else if (stp->enabled && !stp->probe->cond_enabled) {"; -- s.op->newline(+1) << "//dbug_otf(\"disabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; -+ s.op->newline(+1) << "dbug_otf(\"disabling (hrtimer) pidx %zu\\n\", stp->probe->index);"; - s.op->newline( 0) << "_stp_hrtimer_cancel(stp);"; - s.op->newline(-1) << "}"; - s.op->newline( 0) << "stp->enabled = stp->probe->cond_enabled;"; -commit 3abe2c40b2dae499aff2e31beff121fbe43f7654 -Author: Sultan Alsawaf -Date: Fri Dec 11 12:31:25 2020 -0800 - - transport: set is_global to zero even when bulkmode is disabled - - This is needed now that we always want per-cpu logger threads. When - is_global is set to a non-zero value, relay won't create per-cpu log - files. - -diff --git a/runtime/transport/debugfs.c b/runtime/transport/debugfs.c -index 28a5bf89d..bd20281a0 100644 ---- a/runtime/transport/debugfs.c -+++ b/runtime/transport/debugfs.c -@@ -256,13 +256,8 @@ __stp_debugfs_relay_create_buf_file_callback(const char *filename, - * cause relay_open() to create a single global buffer rather - * than the default set of per-cpu buffers. - */ -- if (is_global) { --#ifdef STP_BULKMODE -+ if (is_global) - *is_global = 0; --#else -- *is_global = 1; --#endif -- } - - if (IS_ERR(file)) { - file = NULL; -diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c -index 262409356..b0a5d5760 100644 ---- a/runtime/transport/procfs.c -+++ b/runtime/transport/procfs.c -@@ -328,13 +328,8 @@ __stp_procfs_relay_create_buf_file_callback(const char *filename, - unsigned i = 0; - struct inode* in; - -- if (is_global) { --#ifdef STP_BULKMODE -+ if (is_global) - *is_global = 0; --#else -- *is_global = 1; --#endif -- } - - if (parent != _stp_procfs_module_dir_path.dentry) - goto out; -commit a26bf7890196395d73ac193b23e271398731745d -Author: Frank Ch. Eigler -Date: Fri Dec 11 15:39:29 2020 -0500 - - relay transport: comment on STP_BULK message - - While we've eliminated any STP_BULKMODE effects from the way relayfs - files are used ("always bulkmode"), staprun/stapio still need to know - whether the user intended "stap -b" or not, so they can save files - stpd_cpu* files separately. - -diff --git a/runtime/transport/control.c b/runtime/transport/control.c -index 9343b3c28..d123bef2f 100644 ---- a/runtime/transport/control.c -+++ b/runtime/transport/control.c -@@ -88,6 +88,9 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz - break; - - case STP_BULK: -+ // NB: this signals the runtime to save separate -+ // per-cpu files; our kernel->userspace files are now -+ // always bulkmode (trace$N files). - #ifdef STP_BULKMODE - // no action needed - break; -commit b43eb4ed690bf2421978ed2896667e45e60c3400 -Author: Cosmin Tanislav -Date: Thu Dec 10 16:48:54 2020 -0500 - - bugfix: runtime: transport: handle more error cases in module init - - Signed-off-by: Sultan Alsawaf - -diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c -index 2ba5eea7d..27729f4c8 100644 ---- a/runtime/transport/relay_v2.c -+++ b/runtime/transport/relay_v2.c -@@ -277,6 +277,7 @@ static int _stp_transport_data_fs_init(void) - #endif /* (RELAYFS_CHANNEL_VERSION < 7) */ - if (!_stp_relay_data.rchan) { - rc = -ENOENT; -+ errk("%s: relay_open() failed: %d\n", THIS_MODULE->name, rc); - goto err; - } - /* Increment _stp_allocated_memory and _stp_allocated_net_memory to account for buffers -diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c -index 1be3e9485..f005e14e2 100644 ---- a/runtime/transport/transport.c -+++ b/runtime/transport/transport.c -@@ -552,6 +552,8 @@ static void _stp_transport_close(void) - */ - static int _stp_transport_init(void) - { -+ int ret; -+ - dbug_trans(1, "transport_init\n"); - #ifdef STAPCONF_TASK_UID - _stp_uid = current->uid; -@@ -603,20 +605,28 @@ static int _stp_transport_init(void) - dbug_trans(1, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size); - } - -- if (_stp_transport_fs_init(THIS_MODULE->name) != 0) -+ ret = _stp_transport_fs_init(THIS_MODULE->name); -+ if (ret) - goto err0; - - /* create control channel */ -- if (_stp_register_ctl_channel() < 0) -+ ret = _stp_register_ctl_channel(); -+ if (ret < 0) - goto err1; - - /* create print buffers */ -- if (_stp_print_init() < 0) -+ ret = _stp_print_init(); -+ if (ret < 0) { -+ errk("%s: can't create print buffers!", THIS_MODULE->name); - goto err2; -+ } - - /* set _stp_module_self dynamic info */ -- if (_stp_module_update_self() < 0) -+ ret = _stp_module_update_self(); -+ if (ret < 0) { -+ errk("%s: can't update dynamic info!", THIS_MODULE->name); - goto err3; -+ } - - /* start transport */ - _stp_transport_data_fs_start(); -@@ -639,7 +649,7 @@ err2: - err1: - _stp_transport_fs_close(); - err0: -- return -1; -+ return ret; - } - - static inline void _stp_lock_inode(struct inode *inode) -commit 341bf33f14062269c52bcebaa309518d9972ca00 -Author: Frank Ch. Eigler -Date: Fri Dec 11 18:06:36 2020 -0500 - - staprun: handle more and fewer cpus better - - NR_CPUS was a hard-coded minimum and maximum on the number of CPUs - worth of trace$N files staprun/stapio would open at startup. While a - constant is useful for array sizing (and so might as well be really - large), the actual iteration should be informed by get_nprocs_conf(3). - - This patch replaces NR_CPUS with MAX_NR_CPUS (now 1024, why not), and - limits open/thread iterations to the actual number of processors. It - even prints an error if a behemoth >1K-core machine comes into being. - -diff --git a/staprun/relay.c b/staprun/relay.c -index c76e76719..3eb8df34b 100644 ---- a/staprun/relay.c -+++ b/staprun/relay.c -@@ -12,16 +12,16 @@ - - #include "staprun.h" - --int out_fd[NR_CPUS]; -+int out_fd[MAX_NR_CPUS]; - int monitor_end = 0; --static pthread_t reader[NR_CPUS]; --static int relay_fd[NR_CPUS]; --static int avail_cpus[NR_CPUS]; --static int switch_file[NR_CPUS]; --static pthread_mutex_t mutex[NR_CPUS]; -+static pthread_t reader[MAX_NR_CPUS]; -+static int relay_fd[MAX_NR_CPUS]; -+static int avail_cpus[MAX_NR_CPUS]; -+static int switch_file[MAX_NR_CPUS]; -+static pthread_mutex_t mutex[MAX_NR_CPUS]; - static int bulkmode = 0; - static volatile int stop_threads = 0; --static time_t *time_backlog[NR_CPUS]; -+static time_t *time_backlog[MAX_NR_CPUS]; - static int backlog_order=0; - #define BACKLOG_MASK ((1 << backlog_order) - 1) - #define MONITORLINELENGTH 4096 -@@ -313,12 +313,19 @@ int init_relayfs(void) - if (send_request(STP_BULK, rqbuf, sizeof(rqbuf)) == 0) - bulkmode = 1; - -- /* Try to open a slew of per-cpu trace%d files. Per PR19241, we -- need to go through all potentially present CPUs up to NR_CPUS, that -- we hope is a reasonable limit. For !bulknode, "trace0" will be -- typically used. */ -+ /* Try to open a slew of per-cpu trace%d files. Per PR19241, -+ we need to go through all potentially present CPUs up to -+ get_nprocs_conf(), up to MAX_NR_CPUS (used for array -+ allocations). For !bulknode, "trace0" will be typically -+ used, prior to systemtap 4.5; after, all are used. */ - -- for (i = 0; i < NR_CPUS; i++) { -+ int nprocs = get_nprocs_conf(); -+ if (nprocs > MAX_NR_CPUS) { -+ err("Too many CPUs: get_nprocs_conf()=%d vs MAX_NR_CPUS=%d\n", nprocs, MAX_NR_CPUS); -+ return -1; -+ } -+ -+ for (i = 0; i < nprocs; i++) { - relay_fd[i] = -1; - - #ifdef HAVE_OPENAT -@@ -348,7 +355,8 @@ int init_relayfs(void) - } - } - ncpus = cpui; -- dbug(2, "ncpus=%d, bulkmode = %d\n", ncpus, bulkmode); -+ /* ncpus could be smaller than nprocs if some cpus are offline */ -+ dbug(2, "ncpus=%d, nprocs=%d, bulkmode=%d\n", ncpus, nprocs, bulkmode); - for (i = 0; i < ncpus; i++) - dbug(2, "cpui=%d, relayfd=%d\n", i, avail_cpus[i]); - -diff --git a/staprun/relay_old.c b/staprun/relay_old.c -index f0d2e918f..248e6059d 100644 ---- a/staprun/relay_old.c -+++ b/staprun/relay_old.c -@@ -14,12 +14,12 @@ - #include "staprun.h" - - /* temporary per-cpu output written here for relayfs, filebase0...N */ --static int relay_fd[NR_CPUS]; --static int proc_fd[NR_CPUS]; --static FILE *percpu_tmpfile[NR_CPUS]; --static char *relay_buffer[NR_CPUS]; --static pthread_t reader[NR_CPUS]; --static int switch_file[NR_CPUS]; -+static int relay_fd[MAX_NR_CPUS]; -+static int proc_fd[MAX_NR_CPUS]; -+static FILE *percpu_tmpfile[MAX_NR_CPUS]; -+static char *relay_buffer[MAX_NR_CPUS]; -+static pthread_t reader[MAX_NR_CPUS]; -+static int switch_file[MAX_NR_CPUS]; - static int bulkmode = 0; - unsigned subbuf_size = 0; - unsigned n_subbufs = 0; -@@ -37,7 +37,7 @@ static struct buf_status - { - struct _stp_buf_info info; - unsigned max_backlog; /* max # sub-buffers ready at one time */ --} status[NR_CPUS]; -+} status[MAX_NR_CPUS]; - - - /** -@@ -461,7 +461,13 @@ int init_oldrelayfs(void) - relay_fd[0] = -1; - out_fd[0] = 0; - -- for (i = 0; i < NR_CPUS; i++) { -+ int nprocs = get_nprocs_conf(); -+ if (nprocs > MAX_NR_CPUS) { -+ err("Too many CPUs: get_nprocs_conf()=%d vs MAX_NR_CPUS=%d\n", nprocs, MAX_NR_CPUS); -+ goto err; -+ } -+ -+ for (i = 0; i < nprocs; i++) { - int ret = open_relayfs_files(i, relay_filebase, proc_filebase); - if (ret == 0) - break; -@@ -472,7 +478,8 @@ int init_oldrelayfs(void) - } - - ncpus = i; -- dbug(2, "ncpus=%d\n", ncpus); -+ /* ncpus could be smaller than nprocs if some cpus are offline */ -+ dbug(2, "ncpus=%d, nprocs=%d\n", ncpus, nprocs); - - if (ncpus == 0) { - err("Couldn't open relayfs files.\n"); -diff --git a/staprun/stap_merge.c b/staprun/stap_merge.c -index 7507f0e3d..87de7d465 100644 ---- a/staprun/stap_merge.c -+++ b/staprun/stap_merge.c -@@ -31,15 +31,15 @@ static void usage (char *prog) - } - - #define TIMESTAMP_SIZE (sizeof(int)) --#define NR_CPUS 256 -+#define MAX_NR_CPUS 1024 - - int main (int argc, char *argv[]) - { - char *buf, *outfile_name = NULL; - int c, i, j, rc, dropped=0; -- long count=0, min, num[NR_CPUS] = { 0 }; -+ long count=0, min, num[MAX_NR_CPUS] = { 0 }; - FILE *ofp = NULL; -- FILE *fp[NR_CPUS] = { 0 }; -+ FILE *fp[MAX_NR_CPUS] = { 0 }; - int ncpus, len, verbose = 0; - int bufsize = 65536; - -@@ -67,6 +67,10 @@ int main (int argc, char *argv[]) - - i = 0; - while (optind < argc) { -+ if (i >= MAX_NR_CPUS) { -+ fprintf(stderr, "too many files (MAX_NR_CPUS=%d)\n", MAX_NR_CPUS); -+ return -1; -+ } - fp[i] = fopen(argv[optind++], "r"); - if (!fp[i]) { - fprintf(stderr, "error opening file %s.\n", argv[optind - 1]); -diff --git a/staprun/staprun.h b/staprun/staprun.h -index e05dbe5b6..2d68bf527 100644 ---- a/staprun/staprun.h -+++ b/staprun/staprun.h -@@ -37,6 +37,7 @@ - #include - #include - #include -+#include - - /* Include config.h to pick up dependency for --prefix usage. */ - #include "../config.h" -@@ -285,10 +286,10 @@ extern int optopt; - extern int optind; - - /* maximum number of CPUs we can handle */ --#define NR_CPUS 256 -+#define MAX_NR_CPUS 1024 - - /* relay*.c uses these */ --extern int out_fd[NR_CPUS]; -+extern int out_fd[MAX_NR_CPUS]; - - /* relay_old uses these. Set in ctl.c */ - extern unsigned subbuf_size; -commit b4b5a29b51586f75de16cacdb44bdf0b3ad0478e -Author: Sultan Alsawaf -Date: Mon Dec 14 13:20:34 2020 -0800 - - staprun: use the correct out_fd when bulkmode and fsize_max aren't used - - When bulkmode and fsize_max aren't used, there is only one output fd and - it is stored at out_fd[avail_cpus[0]]. - -diff --git a/staprun/relay.c b/staprun/relay.c -index 3eb8df34b..d0202e52f 100644 ---- a/staprun/relay.c -+++ b/staprun/relay.c -@@ -232,10 +232,17 @@ static void *reader_thread(void *data) - wbuf += bytes; - wsize += bytes; - } else { -- rc = write(out_fd[cpu], wbuf, wbytes); -+ int fd; -+ /* Only bulkmode and fsize_max use per-cpu output files. Otherwise, -+ there's just a single output fd stored at out_fd[avail_cpus[0]]. */ -+ if (bulkmode || fsize_max) -+ fd = out_fd[cpu]; -+ else -+ fd = out_fd[avail_cpus[0]]; -+ rc = write(fd, wbuf, wbytes); - if (rc <= 0) { - perr("Couldn't write to output %d for cpu %d, exiting.", -- out_fd[cpu], cpu); -+ fd, cpu); - goto error_out; - } - wbytes -= rc; -commit b26b4e2c257e0bd65134eed5e51d754227a4ed3f -Author: Sultan Alsawaf -Date: Wed Dec 30 14:21:42 2020 -0800 - - task_finder2: fix panics due to broken task work cancellation - - The task_work_cancel() API uses function pointers to uniquely identify - task work structs, so there's no guarantee that a specific task work - struct we want to cancel is the one that will actually get canceled. - This issue would cause task work structs to be freed while they were - still queued up on the task's task-worker list. - - This is an example of one such panic, where the DEBUG_MEM feature - reported that a __stp_tf_task_work struct (56 bytes) wasn't freed, - because that specific task worker got canceled and instead an active - task worker got freed! - - orxray_resty_mem_X_35062: ERROR: Memory ffff8809ed388620 len=56 allocation type: kmalloc. Not freed. - BUG: unable to handle kernel paging request at ffffffffa0570877 - IP: [] 0xffffffffa0570876 - PGD 1abd067 PUD 1abe063 PMD 1028286067 PTE 0 - Oops: 0010 [#1] SMP - CPU: 3 PID: 1338 Comm: nginx Tainted: G OE ------------ 3.10.0-514.10.2.el7.x86_64.debug #1 - Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 - task: ffff880eae2d0000 ti: ffff880eaf2e4000 task.ti: ffff880eaf2e4000 - RIP: 0010:[] [] 0xffffffffa0570876 - RSP: 0018:ffff880eaf2e7d78 EFLAGS: 00010282 - RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000000 - RDX: ffff8809ed388640 RSI: 0000000000000000 RDI: ffff8809ed388640 - RBP: ffff880eaf2e7da0 R08: 0000000000000000 R09: 0000000000000000 - R10: 0000000000000001 R11: ffffffffff90001c R12: ffffffff8248b1c0 - R13: ffff880eae2d0818 R14: ffff880eae2d0000 R15: 00007eff3d2490b0 - FS: 00007eff3dcd2740(0000) GS:ffff881037c00000(0000) knlGS:0000000000000000 - CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - CR2: ffffffffa0570877 CR3: 0000000ebce67000 CR4: 00000000003406e0 - DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 - Stack: - ffffffff810c6544 ffff880eaf2e7f58 ffff880eaf2e7e70 ffff880eae2d0000 - 00007eff3dcb3338 ffff880eaf2e7e38 ffffffff810b31ba ffff880eaf2e7dc0 - ffffffff8106c279 ffff880eaf2e7e50 ffff880ef8a792c0 ffff880eaf2e7df8 - Call Trace: - [] ? task_work_run+0xb4/0xe0 - [] get_signal_to_deliver+0x85a/0x960 - [] ? kvm_sched_clock_read+0x9/0x20 - [] ? sched_clock_local+0x1d/0x80 - [] ? sched_clock_cpu+0xb8/0xe0 - [] do_signal+0x57/0x6e0 - [] ? int_very_careful+0x5/0xd - [] do_notify_resume+0x5f/0xb0 - [] int_signal+0x12/0x17 - Code: Bad RIP value. - RIP [] 0xffffffffa0570876 - RSP - CR2: ffffffffa0570877 - ---[ end trace 1cdf8e5b522b246e ]--- - -diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c -index 4e0b68f7c..ecf1f77fd 100644 ---- a/runtime/linux/task_finder2.c -+++ b/runtime/linux/task_finder2.c -@@ -226,10 +226,22 @@ static void __stp_tf_cancel_all_task_work(void) - // Cancel all remaining requests. - stp_spin_lock_irqsave(&__stp_tf_task_work_list_lock, flags); - list_for_each_entry_safe(node, tmp, &__stp_tf_task_work_list, list) { -- if (stp_task_work_cancel(node->task, node->work.func)) { -- list_del(&node->list); -- _stp_kfree(node); -- } -+ struct __stp_tf_task_work *tf_work; -+ struct task_work *work; -+ -+ work = stp_task_work_cancel(node->task, node->work.func); -+ if (!work) -+ continue; -+ -+ /* -+ * There can be multiple queued task workers with the same -+ * worker func, so there's no guarantee that tf_work == node. -+ * Therefore, we can only free what stp_task_work_cancel() just -+ * gave us; freeing 'node' would be unsafe. -+ */ -+ tf_work = container_of(work, typeof(*tf_work), work); -+ list_del(&tf_work->list); -+ _stp_kfree(tf_work); - } - stp_spin_unlock_irqrestore(&__stp_tf_task_work_list_lock, flags); - } -commit 96470399a5a6fba864b90afd15eda43cdc8c8ac4 -Author: Sultan Alsawaf -Date: Wed Dec 30 15:42:11 2020 -0800 - - task_finder2: fix list corruption in __stp_tf_cancel_all_task_work() - - The previous commit (b26b4e2c2 "task_finder2: fix panics due to broken - task work cancellation") made it possible for the next node in the task - work list to be free, which would made list_for_each_entry_safe() not so - safe anymore. Using list_for_each_entry_safe() is still the fastest - approach here, so when the next node in the list happens to be freed, we - should just restart iteration on the list. - -diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c -index ecf1f77fd..83fc17b5e 100644 ---- a/runtime/linux/task_finder2.c -+++ b/runtime/linux/task_finder2.c -@@ -225,6 +225,7 @@ static void __stp_tf_cancel_all_task_work(void) - - // Cancel all remaining requests. - stp_spin_lock_irqsave(&__stp_tf_task_work_list_lock, flags); -+restart: - list_for_each_entry_safe(node, tmp, &__stp_tf_task_work_list, list) { - struct __stp_tf_task_work *tf_work; - struct task_work *work; -@@ -242,6 +243,21 @@ static void __stp_tf_cancel_all_task_work(void) - tf_work = container_of(work, typeof(*tf_work), work); - list_del(&tf_work->list); - _stp_kfree(tf_work); -+ -+ /* -+ * If the tf_work we just freed was the next node in the list, -+ * then we need to restart the list iteration because -+ * list_for_each_entry_safe() can't cope with the next node -+ * being freed. We still need to use list_for_each_entry_safe() -+ * because we need to get through one successful pass through -+ * the entire list, since it's not guaranteed that this list -+ * will be empty when this function exits, as there can still be -+ * active task workers running, which is fine since the -+ * stp_task_work API will wait for all task workers to finish -+ * before allowing the module to unload. -+ */ -+ if (tf_work == tmp) -+ goto restart; - } - stp_spin_unlock_irqrestore(&__stp_tf_task_work_list_lock, flags); - } -commit 6cb54128e005d1220a7b064ee42b9f72561c28e7 -Author: Sultan Alsawaf -Date: Wed Dec 30 15:47:58 2020 -0800 - - task_finder2: fix task worker race on module unload - - Unfortunately, __stp_tf_cancel_all_task_work() does not guarantee that - all of the task finder's task workers will be finished executing when it - returns. In this case, we rely on the stp_task_work API to prevent the - module from being unloaded while there are task workers in-flight, which - works, but the stp_task_work API is notified of a task worker finishing - before it actually finishes. Inside __stp_tf_task_worker_fn(), the - call to the task worker's function (tf_work->func) is where the final - refcount in the stp_task_work API could be put, but there will still be - instructions left in the task worker that will be executing for a short - time after that. In that short time, there can be a race where the - module is unloaded before the task worker finishes executing all of its - instructions, especially if the task worker gets preempted during this - time on a PREEMPT kernel. - - To remedy this, we must ensure that the last instruction in - __stp_tf_task_worker_fn() is where the stp_task_work API is notified of - a task worker finishing. - -diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c -index 83fc17b5e..2bab19295 100644 ---- a/runtime/linux/task_finder2.c -+++ b/runtime/linux/task_finder2.c -@@ -150,6 +150,7 @@ __stp_tf_task_worker_fn(struct task_work *work) - * workers for this task. - */ - __stp_tf_task_work_free(work); -+ stp_task_work_func_done(); - } - - static void -@@ -1066,11 +1067,8 @@ __stp_tf_clone_worker(struct task_work *work) - - might_sleep(); - if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING -- || current->flags & PF_EXITING) { -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -+ || current->flags & PF_EXITING) - return; -- } - - __stp_tf_handler_start(); - -@@ -1085,10 +1083,6 @@ __stp_tf_clone_worker(struct task_work *work) - } - - __stp_tf_handler_end(); -- -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -- return; - } - - -@@ -1392,11 +1386,8 @@ __stp_tf_quiesce_worker(struct task_work *work) - - might_sleep(); - if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING -- || current->flags & PF_EXITING) { -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -+ || current->flags & PF_EXITING) - return; -- } - - /* If we had a build-id based executable probe (so we have a - * tgt->build_id) set, we could not check it back in -@@ -1420,8 +1411,6 @@ __stp_tf_quiesce_worker(struct task_work *work) - (long) current->tgid, ok); - if (!ok) { - // stap_utrace_detach (current, & tgt->ops); -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); - return; - } - } -@@ -1444,10 +1433,6 @@ __stp_tf_quiesce_worker(struct task_work *work) - __stp_call_callbacks(tgt, current, 1, (current->pid == current->tgid)); - - __stp_tf_handler_end(); -- -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -- return; - } - - static u32 -@@ -1614,18 +1599,12 @@ __stp_tf_mmap_worker(struct task_work *work) - - // See if we can find saved syscall info. - entry = __stp_tf_get_map_entry(current); -- if (entry == NULL) { -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -+ if (entry == NULL) - return; -- } - - if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING - || current->flags & PF_EXITING) { - __stp_tf_remove_map_entry(entry); -- -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); - return; - } - -@@ -1650,10 +1629,6 @@ __stp_tf_mmap_worker(struct task_work *work) - __stp_tf_remove_map_entry(entry); - - __stp_tf_handler_end(); -- -- /* Remember that this task_work_func is finished. */ -- stp_task_work_func_done(); -- return; - } - - static u32 diff --git a/SOURCES/rhbz1908904.patch b/SOURCES/rhbz1908904.patch deleted file mode 100644 index 7cbe564..0000000 --- a/SOURCES/rhbz1908904.patch +++ /dev/null @@ -1,44 +0,0 @@ -commit e3287bddcdc51705bb206479daa82e97fb28071f -Author: Frank Ch. Eigler -Date: Wed Dec 9 22:29:43 2020 -0500 - - PR27044: fix lock loop for conditional probes - - Emit a nested block carefully so that the "goto out;" from a failed - stp_lock_probe() call in that spot near the epilogue of a - probe-handler goes downward, not upward. - -diff --git a/translate.cxx b/translate.cxx -index f0195486c..d7fe1e3b2 100644 ---- a/translate.cxx -+++ b/translate.cxx -@@ -2969,21 +2969,21 @@ c_unparser::emit_probe (derived_probe* v) - { - // PR26296 - // emit all read/write locks for global variables ... if somehow still not done by now -+ // emit a local out: label, for error catching in these condition exprs -+ o->newline() << "{"; -+ o->newline(1) << "__label__ out, deref_fault;"; - if (v->needs_global_locks ()) - emit_lock (); - - for (set::const_iterator -- it = v->probes_with_affected_conditions.begin(); -- it != v->probes_with_affected_conditions.end(); ++it) -+ it = v->probes_with_affected_conditions.begin(); -+ it != v->probes_with_affected_conditions.end(); ++it) - { -- // emit a local out: label, for error catching in these condition exprs -- o->newline() << "{"; -- o->newline(1) << "__label__ out, deref_fault;"; - emit_probe_condition_update(*it); -- o->newline(-1) << "deref_fault: __attribute__((unused));"; -- o->newline() << "out: __attribute__((unused));"; -- o->newline() << "}"; - } -+ o->newline(-1) << "deref_fault: __attribute__((unused));"; -+ o->newline() << "out: __attribute__((unused));"; -+ o->newline() << "}"; - } - - // PR26296 diff --git a/SOURCES/rhbz1914948.patch b/SOURCES/rhbz1914948.patch deleted file mode 100644 index c180b7f..0000000 --- a/SOURCES/rhbz1914948.patch +++ /dev/null @@ -1,168 +0,0 @@ -commit 175f2b06805bb79d6994c5c909b741c8dcba3528 -Author: Sultan Alsawaf -Date: Fri Jan 22 15:19:33 2021 -0800 - - runtime: utilize relay subbufs as much as possible - - The relay subbufs used for printing are used very inefficiently, causing - print messages to be frequently dropped. The cause for this inefficiency - is that every time a print flush occurs, the current subbuf is switched - out even if it isn't filled. We can't wait for a subbuf to fill up - before switching it out either, or messages will be delayed. - - To remedy this, we instead check to see if there's any data in any - subbuf and use that as an indicator to staprun to tell if there's data - available to read. Then when staprun attempts to read the data out, we - can switch out the current subbuf if it has data in it. This lets us - squeeze out every bit of storage from the subbufs. - - Any print drops experienced after this patch should be fixed by - increasing the subbuf count (_stp_nsubbufs). - -diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c -index a365db6f2..97a6e123a 100644 ---- a/runtime/transport/procfs.c -+++ b/runtime/transport/procfs.c -@@ -235,14 +235,16 @@ static int _stp_procfs_transport_fs_init(const char *module_name) - { - #ifdef STAPCONF_PROC_OPS - relay_procfs_operations.proc_open = relay_file_operations.open; -- relay_procfs_operations.proc_poll = relay_file_operations.poll; -+ relay_procfs_operations.proc_poll = __stp_relay_file_poll; - relay_procfs_operations.proc_mmap = relay_file_operations.mmap; -- relay_procfs_operations.proc_read = relay_file_operations.read; -+ relay_procfs_operations.proc_read = __stp_relay_file_read; - relay_procfs_operations.proc_lseek = relay_file_operations.llseek; - relay_procfs_operations.proc_release = relay_file_operations.release; - #else - relay_procfs_operations = relay_file_operations; - relay_procfs_operations.owner = THIS_MODULE; -+ relay_procfs_operations.poll = __stp_relay_file_poll; -+ relay_procfs_operations.read = __stp_relay_file_read; - #endif - - if (_stp_mkdir_proc_module()) // get the _stp_procfs_module_dir* created -diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c -index 27729f4c8..3b505c992 100644 ---- a/runtime/transport/relay_v2.c -+++ b/runtime/transport/relay_v2.c -@@ -77,14 +77,6 @@ static size_t __stp_relay_switch_subbuf(struct rchan_buf *buf, size_t length) - buf->dentry->d_inode->i_size += buf->chan->subbuf_size - - buf->padding[old_subbuf]; - smp_mb(); -- if (waitqueue_active(&buf->read_wait)) -- /* -- * Calling wake_up_interruptible() and __mod_timer() -- * from here will deadlock if we happen to be logging -- * from the scheduler and timer (trying to re-grab -- * rq->lock/timer->base->lock), so just set a flag. -- */ -- atomic_set(&_stp_relay_data.wakeup, 1); - } - - old = buf->data; -@@ -101,10 +93,56 @@ static size_t __stp_relay_switch_subbuf(struct rchan_buf *buf, size_t length) - return length; - } - -+static bool __stp_relay_buf_empty(struct rchan_buf *buf) -+{ -+ return !buf->offset && buf->subbufs_produced == buf->subbufs_consumed; -+} -+ -+static unsigned int __stp_relay_file_poll(struct file *filp, poll_table *wait) -+{ -+ struct rchan_buf *buf = filp->private_data; -+ -+ if (buf->finalized) -+ return POLLERR; -+ -+ if (filp->f_mode & FMODE_READ) { -+ poll_wait(filp, &buf->read_wait, wait); -+ if (!__stp_relay_buf_empty(buf)) -+ return POLLIN | POLLRDNORM; -+ } -+ -+ return 0; -+} -+ -+static ssize_t __stp_relay_file_read(struct file *filp, char __user *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct rchan_buf *buf = filp->private_data; -+ unsigned long flags; -+ -+ /* This trylock will only fail after the print driver is destroyed */ -+ if (_stp_print_trylock_irqsave(&flags)) { -+ /* Switch out the current buffer if it has any data */ -+ if (buf->offset) -+ __stp_relay_switch_subbuf(buf, 0); -+ _stp_print_unlock_irqrestore(&flags); -+ } -+ -+ /* -+ * Optimization: bail out early if there's nothing to read. This is -+ * faster than going into relay's read() function without having any -+ * data, as it will acquire a mutex lock for the inode before bailing. -+ */ -+ if (buf->subbufs_produced == buf->subbufs_consumed) -+ return -ENODATA; -+ -+ return relay_file_operations.read(filp, buffer, count, ppos); -+} -+ - static void __stp_relay_wakeup_readers(struct rchan_buf *buf) - { - if (buf && waitqueue_active(&buf->read_wait) && -- buf->subbufs_produced != buf->subbufs_consumed) -+ !__stp_relay_buf_empty(buf)) - wake_up_interruptible(&buf->read_wait); - } - -@@ -112,10 +150,9 @@ static void __stp_relay_wakeup_timer(stp_timer_callback_parameter_t unused) - { - int i; - -- if (atomic_read(&_stp_relay_data.wakeup)) { -+ if (atomic_cmpxchg(&_stp_relay_data.wakeup, 1, 0)) { - struct rchan_buf *buf; - -- atomic_set(&_stp_relay_data.wakeup, 0); - for_each_possible_cpu(i) { - buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, - i); -@@ -266,6 +303,8 @@ static int _stp_transport_data_fs_init(void) - } - relay_file_operations_w_owner = relay_file_operations; - relay_file_operations_w_owner.owner = THIS_MODULE; -+ relay_file_operations_w_owner.poll = __stp_relay_file_poll; -+ relay_file_operations_w_owner.read = __stp_relay_file_read; - #if (RELAYFS_CHANNEL_VERSION >= 7) - _stp_relay_data.rchan = relay_open("trace", _stp_get_module_dir(), - _stp_subbuf_size, _stp_nsubbufs, -@@ -323,10 +362,12 @@ _stp_data_write_reserve(size_t size_request, void **entry) - - buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, - smp_processor_id()); -- if (unlikely(buf->offset + size_request > buf->chan->subbuf_size)) { -+ if (buf->offset >= buf->chan->subbuf_size) { - size_request = __stp_relay_switch_subbuf(buf, size_request); - if (!size_request) - return 0; -+ } else if (buf->offset + size_request > buf->chan->subbuf_size) { -+ size_request = buf->chan->subbuf_size - buf->offset; - } - *entry = (char*)buf->data + buf->offset; - buf->offset += size_request; -@@ -342,10 +383,6 @@ static unsigned char *_stp_data_entry_data(void *entry) - - static int _stp_data_write_commit(void *entry) - { -- struct rchan_buf *buf; -- -- buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, -- smp_processor_id()); -- __stp_relay_switch_subbuf(buf, 0); -+ atomic_set(&_stp_relay_data.wakeup, 1); - return 0; - } diff --git a/SOURCES/rhbz1966742.patch b/SOURCES/rhbz1966742.patch deleted file mode 100644 index 70f0e11..0000000 --- a/SOURCES/rhbz1966742.patch +++ /dev/null @@ -1,23 +0,0 @@ -commit 87221aeb66e3e3475bf6df77cc7b6338313d0946 -Author: Ding Hui -Date: Fri Nov 27 13:27:34 2020 -0500 - - PR26958: avoid null-deref in buildid verification failure diagnostic - - We can't traverse tsk-> if it's NULL. - - Tested by on hand-modified "stap -kp4" stap-symbols.c file. - -diff --git a/runtime/sym.c b/runtime/sym.c -index a51f08c9e..521aebe48 100644 ---- a/runtime/sym.c -+++ b/runtime/sym.c -@@ -735,7 +735,7 @@ static int _stp_build_id_check (struct _stp_module *m, - // we make sure below is really a warning. - _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address " - "%#lx, expected %s actual %s\n", -- m->path, (long) tsk->tgid, -+ m->path, (long) (tsk ? tsk->tgid : 0), - notes_addr, hexstring_theory, hexstring_practice); - return 1; - } diff --git a/SOURCES/rhbz1991631.patch b/SOURCES/rhbz1991631.patch new file mode 100644 index 0000000..9598ac8 --- /dev/null +++ b/SOURCES/rhbz1991631.patch @@ -0,0 +1,29 @@ +commit 515a6a2d63cdf16c5bc599f0d29283289219d9a4 +Author: Frank Ch. Eigler +Date: Thu Jun 24 13:30:38 2021 -0400 + + rhbz1972828: tapsets: iommu tracepoints + + Disable detection of intel-iommu tracepoint family on non-x86 + platforms, because the 5.13ish kernel headers for this tracepoint + include references to functions like clcache_flush_range which don't + exist on all non-x86. + +diff --git a/tapsets.cxx b/tapsets.cxx +index a5e41129f..20e0cb68f 100644 +--- a/tapsets.cxx ++++ b/tapsets.cxx +@@ -11930,6 +11930,13 @@ static vector tracepoint_extra_decls (systemtap_session& s, + they_live.push_back ("#include "); + } + ++ if (header.find("intel_iommu") != string::npos && s.architecture != "x86_64" && s.architecture != "i386") ++ { ++ // need asm/cacheflush.h for clflush_cache_range() used in that header, ++ // but this function does not exist on e.g. ppc ++ they_live.push_back ("#error nope"); ++ } ++ + if (header.find("wbt") != string::npos) + { + // blk-wbt.h gets included as "../../../block/blk-wbt.h", so we diff --git a/SPECS/systemtap.spec b/SPECS/systemtap.spec index 2128e2a..548c5c2 100644 --- a/SPECS/systemtap.spec +++ b/SPECS/systemtap.spec @@ -10,7 +10,7 @@ %{!?with_crash: %global with_crash 1} %endif %{!?with_rpm: %global with_rpm 1} -%{!?elfutils_version: %global elfutils_version 0.142} +%{!?elfutils_version: %global elfutils_version 0.179} %{!?pie_supported: %global pie_supported 1} %{!?with_boost: %global with_boost 0} %ifarch %{ix86} x86_64 ppc ppc64 ppc64le aarch64 @@ -22,6 +22,7 @@ %{!?with_systemd: %global with_systemd 0%{?fedora} >= 19 || 0%{?rhel} >= 7} %{!?with_emacsvim: %global with_emacsvim 0%{?fedora} >= 19 || 0%{?rhel} >= 7} %{!?with_java: %global with_java 0%{?fedora} >= 19 || 0%{?rhel} >= 7} +%{!?with_debuginfod: %global with_debuginfod 0%{?fedora} >= 25 || 0%{?rhel} >= 7} %{!?with_virthost: %global with_virthost 0%{?fedora} >= 19 || 0%{?rhel} >= 7} %{!?with_virtguest: %global with_virtguest 1} %{!?with_dracut: %global with_dracut 0%{?fedora} >= 19 || 0%{?rhel} >= 6} @@ -88,8 +89,8 @@ %define __brp_mangle_shebangs_exclude_from .stp$ Name: systemtap -Version: 4.4 -Release: 11%{?release_override}%{?dist} +Version: 4.5 +Release: 3%{?release_override}%{?dist} # for version, see also configure.ac @@ -125,24 +126,20 @@ License: GPLv2+ URL: http://sourceware.org/systemtap/ Source: ftp://sourceware.org/pub/systemtap/releases/systemtap-%{version}.tar.gz -Patch1: rhbz1873492.patch -Patch2: rhbz1898288.patch -Patch3: rhbz1902696.patch -Patch4: rhbz1908904.patch -Patch5: rhbz1906662.patch -Patch6: rhbz1650594.patch -Patch7: rhbz1914948.patch -Patch8: rhbz1966742.patch +Patch1: rhbz1991631.patch # Build* +BuildRequires: make BuildRequires: gcc-c++ BuildRequires: cpio BuildRequires: gettext-devel BuildRequires: pkgconfig(nss) BuildRequires: pkgconfig(avahi-client) +%if %{with_debuginfod} BuildRequires: pkgconfig(libdebuginfod) +%endif %if %{with_dyninst} -BuildRequires: dyninst-devel >= 10.0 +BuildRequires: dyninst-devel >= 11.0 BuildRequires: pkgconfig(libselinux) %endif %if %{with_sqlite} @@ -171,9 +168,6 @@ BuildRequires: /usr/bin/latex /usr/bin/dvips /usr/bin/ps2pdf %if 0%{?fedora} >= 18 || 0%{?rhel} >= 7 BuildRequires: tex(fullpage.sty) tex(fancybox.sty) tex(bchr7t.tfm) tex(graphicx.sty) %endif -# For the html.sty mentioned in the .tex files, even though latex2html is -# not run during the build, only during manual scripts/update-docs runs: -BuildRequires: latex2html %if %{with_htmldocs} # On F10, xmlto's pdf support was broken off into a sub-package, # called 'xmlto-tex'. To avoid a specific F10 BuildReq, we'll do a @@ -185,7 +179,7 @@ BuildRequires: xmlto /usr/share/xmlto/format/fo/pdf BuildRequires: emacs %endif %if %{with_java} -BuildRequires: jpackage-utils java-devel +BuildRequires: java-devel %endif %if %{with_virthost} # BuildRequires: libvirt-devel >= 1.0.2 @@ -272,6 +266,9 @@ Requires: kernel-devel-uname-r %endif Requires: gcc make +# for compiling --runtime=dyninst sripts, need elfutils headers, bz1930973 +Requires: elfutils-devel >= %{elfutils_version} + Conflicts: systemtap-client < %{version}-%{release} Conflicts: systemtap-server < %{version}-%{release} Conflicts: systemtap-runtime < %{version}-%{release} @@ -375,7 +372,16 @@ Requires: systemtap = %{version}-%{release} Requires: systemtap-sdt-devel = %{version}-%{release} Requires: systemtap-server = %{version}-%{release} Requires: dejagnu which elfutils grep nc +%if %{with_debuginfod} Requires: elfutils-debuginfod +%endif +# work around fedora ci gating kvetching about i686<->x86-64 conflicts +%ifarch x86_64 +Conflicts: systemtap-testsuite = %{version}-%{release}.i686 +%endif +%ifarch i686 +Conflicts: systemtap-testsuite = %{version}-%{release}.x86_64 +%endif Requires: gcc gcc-c++ make glibc-devel # testsuite/systemtap.base/ptrace.exp needs strace Requires: strace @@ -406,7 +412,12 @@ Requires: systemtap-runtime-python2 = %{version}-%{release} Requires: systemtap-runtime-python3 = %{version}-%{release} %endif %ifarch x86_64 +%if 0%{?rhel} >= 8 || 0%{?fedora} >= 20 +# fweimer, personal correspondence +Recommends: glibc-devel(x86-32) +%else Requires: /usr/lib/libc.so +%endif # ... and /usr/lib/libgcc_s.so.* # ... and /usr/lib/libstdc++.so.* %endif @@ -430,6 +441,13 @@ Summary: Systemtap Java Runtime Support License: GPLv2+ URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} +# work around fedora ci gating kvetching about i686<->x86-64 conflicts +%ifarch x86_64 +Conflicts: systemtap-runtime = %{version}-%{release}.i686 +%endif +%ifarch i686 +Conflicts: systemtap-runtime = %{version}-%{release}.x86_64 +%endif Requires: byteman > 2.0 Requires: iproute Requires: java-devel @@ -521,13 +539,6 @@ systemtap-runtime-virthost machine to execute systemtap scripts. %prep %setup -q %patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 %build @@ -545,6 +556,13 @@ systemtap-runtime-virthost machine to execute systemtap scripts. %global sqlite_config --disable-sqlite %endif +%if %{with_debuginfod} +%global debuginfod_config --with-debuginfod +%else +%global debuginfod_config --without-debuginfod +%endif + + # Enable/disable the crash extension %if %{with_crash} %global crash_config --enable-crash @@ -626,7 +644,7 @@ systemtap-runtime-virthost machine to execute systemtap scripts. # We don't ship compileworthy python code, just oddball samples %global py_auto_byte_compile 0 -%configure %{dyninst_config} %{sqlite_config} %{crash_config} %{docs_config} %{pie_config} %{rpm_config} %{java_config} %{virt_config} %{dracut_config} %{python3_config} %{python2_probes_config} %{python3_probes_config} %{httpd_config} %{bpf_config} --disable-silent-rules --with-extra-version="rpm %{version}-%{release}" +%configure %{dyninst_config} %{sqlite_config} %{crash_config} %{docs_config} %{pie_config} %{rpm_config} %{java_config} %{virt_config} %{dracut_config} %{python3_config} %{python2_probes_config} %{python3_probes_config} %{httpd_config} %{bpf_config} %{debuginfod_config} --disable-silent-rules --with-extra-version="rpm %{version}-%{release}" make %{?_smp_mflags} @@ -966,60 +984,8 @@ exit 0 # ------------------------------------------------------------------------ -%if %{with_java} - -%triggerin runtime-java -- java-1.8.0-openjdk, java-1.7.0-openjdk, java-1.6.0-openjdk -for f in %{_libexecdir}/systemtap/libHelperSDT_*.so; do - %ifarch %{ix86} - arch=i386 - %else - arch=`basename $f | cut -f2 -d_ | cut -f1 -d.` - %endif - for archdir in %{_jvmdir}/*openjdk*/jre/lib/${arch}; do - if [ -d ${archdir} ]; then - ln -sf %{_libexecdir}/systemtap/libHelperSDT_${arch}.so ${archdir}/libHelperSDT_${arch}.so - ln -sf %{_libexecdir}/systemtap/HelperSDT.jar ${archdir}/../ext/HelperSDT.jar - fi - done -done - -%triggerun runtime-java -- java-1.8.0-openjdk, java-1.7.0-openjdk, java-1.6.0-openjdk -for f in %{_libexecdir}/systemtap/libHelperSDT_*.so; do - %ifarch %{ix86} - arch=i386 - %else - arch=`basename $f | cut -f2 -d_ | cut -f1 -d.` - %endif - for archdir in %{_jvmdir}/*openjdk*/jre/lib/${arch}; do - rm -f ${archdir}/libHelperSDT_${arch}.so - rm -f ${archdir}/../ext/HelperSDT.jar - done -done - -%triggerpostun runtime-java -- java-1.8.0-openjdk, java-1.7.0-openjdk, java-1.6.0-openjdk -# Restore links for any JDKs remaining after a package removal: -for f in %{_libexecdir}/systemtap/libHelperSDT_*.so; do - %ifarch %{ix86} - arch=i386 - %else - arch=`basename $f | cut -f2 -d_ | cut -f1 -d.` - %endif - for archdir in %{_jvmdir}/*openjdk*/jre/lib/${arch}; do - if [ -d ${archdir} ]; then - ln -sf %{_libexecdir}/systemtap/libHelperSDT_${arch}.so ${archdir}/libHelperSDT_${arch}.so - ln -sf %{_libexecdir}/systemtap/HelperSDT.jar ${archdir}/../ext/HelperSDT.jar - fi - done -done - -# XXX: analogous support for other types of JRE/JDK?? - -%endif - -# ------------------------------------------------------------------------ - %files -# The master "systemtap" rpm doesn't include any files. +# The main "systemtap" rpm doesn't include any files. %files server -f systemtap.lang %{_bindir}/stap-server @@ -1073,7 +1039,7 @@ done %license COPYING %if %{with_java} %dir %{_libexecdir}/systemtap -%{_libexecdir}/systemtap/libHelperSDT_*.so +%{_libexecdir}/systemtap/libHelperSDT.so %endif %if %{with_emacsvim} %{_emacs_sitelispdir}/*.el* @@ -1193,7 +1159,7 @@ done %if %{with_java} %files runtime-java %dir %{_libexecdir}/systemtap -%{_libexecdir}/systemtap/libHelperSDT_*.so +%{_libexecdir}/systemtap/libHelperSDT.so %{_libexecdir}/systemtap/HelperSDT.jar %{_libexecdir}/systemtap/stapbm %endif @@ -1247,11 +1213,14 @@ done # PRERELEASE %changelog -* Tue Jun 01 2021 Frank Ch. Eigler - 4.4-11 -- rhbz1966742 intermittent panic on build-id check failure +* Thu Aug 12 2021 Frank Ch. Eigler - 4.5-3 +- rhbz1991631 iommu tracepoints break ppc64le -* Fri Mar 05 2021 Frank Ch. Eigler - 4.4-10 -- rhbz1914948 use subbufs better in transport to avoid msg loss +* Tue Jul 27 2021 Frank Ch. Eigler - 4.5-2 +- rhbz1986543 rebuild against dyninst 11 + +* Fri May 07 2021 Frank Ch. Eigler - 4.5-1 +- Upstream release. * Tue Jan 26 2021 Frank Ch. Eigler - 4.4-9 - rhbz1927497 enable debuginfod client for buildid probing