Blob Blame History Raw
commit 7eed8d1fef36997b9e4c1d9cdb67643483a51e56
Author: William Cohen <wcohen@redhat.com>
Date:   Fri Nov 4 11:12:05 2022 -0400

    Ensure that SystemTap runtime uses smp_processor_id() in proper context
    
    There were cases on Fedora 36 and Rawhide running kernels with
    CONFIG_DEBUG_PREEMPT=y where systemtap scripts would trigger kernel
    log messages like the following:
    
    [  257.544406] check_preemption_disabled: 4 callbacks suppressed
    [  257.544409] BUG: using smp_processor_id() in preemptible [00000000] code: staprun/2106
    [  257.544465] caller is _stp_runtime_context_trylock+0x12/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544507] CPU: 0 PID: 2106 Comm: staprun Tainted: G           OE     -------  ---  6.1.0-0.rc2.20221028git23758867219c.24.fc38.x86_64 #1
    [  257.544544] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-1.fc36 04/01/2014
    [  257.544571] Call Trace:
    [  257.544583]  <TASK>
    [  257.544593]  dump_stack_lvl+0x5b/0x77
    [  257.544620]  check_preemption_disabled+0xe1/0xf0
    [  257.544641]  _stp_runtime_context_trylock+0x12/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544673]  _stp_runtime_entryfn_get_context+0xb/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544705]  _stp_ctl_send+0x76/0x1e0 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544735]  _stp_transport_init+0x71a/0x860 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544771]  ? kallsyms_on_each_symbol+0x30/0x30 [stap_e36600406768aeefd49daf9fc7a3d23c_2106]
    [  257.544803]  do_one_initcall+0x6b/0x320
    [  257.544827]  do_init_module+0x4a/0x200
    [  257.544844]  __do_sys_init_module+0x16a/0x1a0
    [  257.544870]  do_syscall_64+0x58/0x80
    [  257.544885]  ? up_read+0x17/0x20
    [  257.544902]  ? lock_is_held_type+0xe8/0x140
    [  257.544921]  ? asm_exc_page_fault+0x22/0x30
    [  257.544939]  ? lockdep_hardirqs_on+0x7d/0x100
    [  257.544956]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
    [  257.544975] RIP: 0033:0x7f3cde12f5de
    [  257.544992] Code: 48 8b 0d 35 68 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 02 68 0c 00 f7 d8 64 89 01 48
    [  257.545010] RSP: 002b:00007ffc5170c418 EFLAGS: 00000246 ORIG_RAX: 00000000000000af
    [  257.545010] RAX: ffffffffffffffda RBX: 0000563620bd4020 RCX: 00007f3cde12f5de
    [  257.545010] RDX: 0000563620bd4020 RSI: 0000000000040ea0 RDI: 00007f3cde44a010
    [  257.545010] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
    [  257.545010] R10: 0000000000000053 R11: 0000000000000246 R12: 00007ffc5170c510
    [  257.545010] R13: 00007f3cde44a010 R14: 00007ffc5170c440 R15: 00007f3cde4631e8
    [  257.545010]  </TASK>
    
    This issue was introduced by git commit 1641b6e7ea which added a fast
    path check that used smp_processor_id() without first having a
    preempt_disable().  The code now ensures that preemption is disabled
    before using the smp_processor_id().

diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h
index 3ed3cbd22..ee3870f32 100644
--- a/runtime/linux/runtime_context.h
+++ b/runtime/linux/runtime_context.h
@@ -49,13 +49,18 @@ static bool _stp_runtime_context_trylock(void)
 {
 	bool locked;
 
+	/* Need to disable preemption because of the smp_processor_id() call
+	   in _stp_runtime_get_context(). */
+	preempt_disable();
+
 	/* fast path to ignore new online CPUs without percpu context memory
 	 * allocations. this also serves as an extra safe guard for NULL context
 	 * pointers. */
-	if (unlikely(_stp_runtime_get_context() == NULL))
+	if (unlikely(_stp_runtime_get_context() == NULL)) {
+		preempt_enable_no_resched();
 		return false;
+	}
 
-	preempt_disable();
 	locked = atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX);
 	if (!locked)
 		preempt_enable_no_resched();