6df2a7
From cf74193cc37f2ace1197b1e6ebacc6fe542767c8 Mon Sep 17 00:00:00 2001
6df2a7
From: Kairui Song <kasong@redhat.com>
6df2a7
Date: Wed, 8 Apr 2020 16:39:52 +0800
6df2a7
Subject: [PATCH] Remove memtrace-ko and rd.memdebug=4 support in dracut
6df2a7
6df2a7
This feature could be off loaded to memstrack, which have better
6df2a7
accurecy, better performance, and have more detailed tracing features.
6df2a7
6df2a7
Also simplify make_trace_mem a bit.
6df2a7
6df2a7
And currently rd.memdebug=4 is unstable, fails from time to time.
6df2a7
6df2a7
(cherry picked from commit 49c4172f4eef6e2015615e132b199a7ec0699ffc)
6df2a7
6df2a7
Resolves: #1829528
6df2a7
---
6df2a7
 dracut.cmdline.7.asc                             |   4 +-
6df2a7
 modules.d/98dracut-systemd/dracut-cmdline.sh     |   2 +-
6df2a7
 modules.d/98dracut-systemd/dracut-pre-mount.sh   |   2 +-
6df2a7
 modules.d/98dracut-systemd/dracut-pre-pivot.sh   |   2 +-
6df2a7
 modules.d/98dracut-systemd/dracut-pre-trigger.sh |   2 +-
6df2a7
 modules.d/99base/dracut-lib.sh                   |  44 +-----
6df2a7
 modules.d/99base/init.sh                         |   8 +-
6df2a7
 modules.d/99base/memtrace-ko.sh                  | 191 -----------------------
6df2a7
 modules.d/99base/module-setup.sh                 |   1 -
6df2a7
 9 files changed, 18 insertions(+), 238 deletions(-)
6df2a7
6df2a7
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
6df2a7
index ab9a24ad..0dc58d1a 100644
6df2a7
--- a/dracut.cmdline.7.asc
6df2a7
+++ b/dracut.cmdline.7.asc
6df2a7
@@ -187,9 +187,9 @@ It should be attached to any report about dracut problems.
6df2a7
     _/run/initramfs/init.log_.
6df2a7
     If "quiet" is set, it also logs to the console.
6df2a7
 
6df2a7
-**rd.memdebug=[0-4]**::
6df2a7
+**rd.memdebug=[0-3]**::
6df2a7
     Print memory usage info at various points, set the verbose level from 0 to 4.
6df2a7
-+    
6df2a7
++
6df2a7
     Higher level means more debugging output:
6df2a7
 +
6df2a7
 ----
6df2a7
diff --git a/modules.d/98dracut-systemd/dracut-cmdline.sh b/modules.d/98dracut-systemd/dracut-cmdline.sh
6df2a7
index bff9435a..6c6ee026 100755
6df2a7
--- a/modules.d/98dracut-systemd/dracut-cmdline.sh
6df2a7
+++ b/modules.d/98dracut-systemd/dracut-cmdline.sh
6df2a7
@@ -42,7 +42,7 @@ export root
6df2a7
 export rflags
6df2a7
 export fstype
6df2a7
 
6df2a7
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
6df2a7
 # run scriptlets to parse the command line
6df2a7
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
6df2a7
 source_hook cmdline
6df2a7
diff --git a/modules.d/98dracut-systemd/dracut-pre-mount.sh b/modules.d/98dracut-systemd/dracut-pre-mount.sh
6df2a7
index a3b9d291..ae511286 100755
6df2a7
--- a/modules.d/98dracut-systemd/dracut-pre-mount.sh
6df2a7
+++ b/modules.d/98dracut-systemd/dracut-pre-mount.sh
6df2a7
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6df2a7
 
6df2a7
 source_conf /etc/conf.d
6df2a7
 
6df2a7
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6df2a7
 # to the new root.
6df2a7
 getarg 'rd.break=pre-mount' 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
6df2a7
diff --git a/modules.d/98dracut-systemd/dracut-pre-pivot.sh b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6df2a7
index dc9a2504..96e6f8ca 100755
6df2a7
--- a/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6df2a7
+++ b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6df2a7
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6df2a7
 
6df2a7
 source_conf /etc/conf.d
6df2a7
 
6df2a7
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6df2a7
 # to the new root.
6df2a7
 getarg 'rd.break=pre-pivot' 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
6df2a7
diff --git a/modules.d/98dracut-systemd/dracut-pre-trigger.sh b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6df2a7
index 7cd821ed..a1a33960 100755
6df2a7
--- a/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6df2a7
+++ b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6df2a7
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6df2a7
 
6df2a7
 source_conf /etc/conf.d
6df2a7
 
6df2a7
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 
6df2a7
 source_hook pre-trigger
6df2a7
 
6df2a7
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
6df2a7
index f71e757c..502c7568 100755
6df2a7
--- a/modules.d/99base/dracut-lib.sh
6df2a7
+++ b/modules.d/99base/dracut-lib.sh
6df2a7
@@ -1220,50 +1220,25 @@ are_lists_eq() {
6df2a7
 
6df2a7
 setmemdebug() {
6df2a7
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
6df2a7
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 4 rd.memdebug)
6df2a7
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
6df2a7
     fi
6df2a7
 }
6df2a7
 
6df2a7
 setmemdebug
6df2a7
 
6df2a7
-cleanup_trace_mem()
6df2a7
-{
6df2a7
-    # tracekomem based on kernel trace needs cleanup after use.
6df2a7
-    if [ "$DEBUG_MEM_LEVEL" -eq 4 ]; then
6df2a7
-        tracekomem --cleanup
6df2a7
-    fi
6df2a7
-}
6df2a7
-
6df2a7
-# parameters: msg [trace_level:trace]...
6df2a7
-make_trace_mem()
6df2a7
-{
6df2a7
-    local msg
6df2a7
-    msg="$1"
6df2a7
-    shift
6df2a7
-    if [ -n "$DEBUG_MEM_LEVEL" ] && [ "$DEBUG_MEM_LEVEL" -gt 0 ]; then
6df2a7
-        make_trace show_memstats $DEBUG_MEM_LEVEL "[debug_mem]" "$msg" "$@" >&2
6df2a7
-    fi
6df2a7
-}
6df2a7
-
6df2a7
 # parameters: func log_level prefix msg [trace_level:trace]...
6df2a7
-make_trace()
6df2a7
+make_trace_mem()
6df2a7
 {
6df2a7
-    local func log_level prefix msg msg_printed
6df2a7
+    local log_level prefix msg msg_printed
6df2a7
     local trace trace_level trace_in_higher_levels insert_trace
6df2a7
 
6df2a7
-    func=$1
6df2a7
-    shift
6df2a7
-
6df2a7
-    log_level=$1
6df2a7
-    shift
6df2a7
-
6df2a7
-    prefix=$1
6df2a7
-    shift
6df2a7
-
6df2a7
     msg=$1
6df2a7
     shift
6df2a7
 
6df2a7
-    if [ -z "$log_level" ]; then
6df2a7
+    prefix='[debug_mem]'
6df2a7
+    log_level=$DEBUG_MEM_LEVEL
6df2a7
+
6df2a7
+    if [ -z "$log_level" ] || [ "$log_level" -le 0 ]; then
6df2a7
         return
6df2a7
     fi
6df2a7
 
6df2a7
@@ -1296,7 +1271,7 @@ make_trace()
6df2a7
                 echo "$prefix $msg"
6df2a7
                 msg_printed=1
6df2a7
             fi
6df2a7
-            $func $trace
6df2a7
+            show_memstats $trace
6df2a7
         fi
6df2a7
         shift
6df2a7
     done
6df2a7
@@ -1318,9 +1293,6 @@ show_memstats()
6df2a7
         iomem)
6df2a7
             cat /proc/iomem
6df2a7
             ;;
6df2a7
-        komem)
6df2a7
-            tracekomem
6df2a7
-            ;;
6df2a7
     esac
6df2a7
 }
6df2a7
 
6df2a7
diff --git a/modules.d/99base/init.sh b/modules.d/99base/init.sh
6df2a7
index e4f7cff1..2c0ccd66 100755
6df2a7
--- a/modules.d/99base/init.sh
6df2a7
+++ b/modules.d/99base/init.sh
6df2a7
@@ -131,7 +131,7 @@ if ! getargbool 1 'rd.hostonly'; then
6df2a7
 fi
6df2a7
 
6df2a7
 # run scriptlets to parse the command line
6df2a7
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
6df2a7
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
6df2a7
 source_hook cmdline
6df2a7
 
6df2a7
@@ -160,7 +160,7 @@ fi
6df2a7
 
6df2a7
 udevproperty "hookdir=$hookdir"
6df2a7
 
6df2a7
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 getarg 'rd.break=pre-trigger' -d 'rdbreak=pre-trigger' && emergency_shell -n pre-trigger "Break before pre-trigger"
6df2a7
 source_hook pre-trigger
6df2a7
 
6df2a7
@@ -230,7 +230,7 @@ unset RDRETRY
6df2a7
 
6df2a7
 # pre-mount happens before we try to mount the root filesystem,
6df2a7
 # and happens once.
6df2a7
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 getarg 'rd.break=pre-mount' -d 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
6df2a7
 source_hook pre-mount
6df2a7
 
6df2a7
@@ -266,7 +266,7 @@ done
6df2a7
 
6df2a7
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6df2a7
 # to the new root.
6df2a7
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6df2a7
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
6df2a7
 getarg 'rd.break=pre-pivot' -d 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
6df2a7
 source_hook pre-pivot
6df2a7
 
6df2a7
diff --git a/modules.d/99base/memtrace-ko.sh b/modules.d/99base/memtrace-ko.sh
6df2a7
deleted file mode 100755
6df2a7
index ee035e15..00000000
6df2a7
--- a/modules.d/99base/memtrace-ko.sh
6df2a7
+++ /dev/null
6df2a7
@@ -1,191 +0,0 @@
6df2a7
-#!/bin/sh
6df2a7
-
6df2a7
-# Try to find out kernel modules with large total memory allocation during loading.
6df2a7
-# For large slab allocation, it will fall into buddy, also not trace "mm_page_free"
6df2a7
-# considering large free is quite rare for module_init, thus saving tons of events
6df2a7
-# to avoid trace data overwritten.
6df2a7
-#
6df2a7
-# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose.
6df2a7
-
6df2a7
-# "sys/kernel/tracing" has the priority if exists.
6df2a7
-get_trace_base() {
6df2a7
-    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
6df2a7
-    if [ -d "/sys/kernel/tracing" ]; then
6df2a7
-        echo "/sys/kernel"
6df2a7
-    else
6df2a7
-        echo "/sys/kernel/debug"
6df2a7
-    fi
6df2a7
-}
6df2a7
-
6df2a7
-# We want to enable these trace events.
6df2a7
-get_want_events() {
6df2a7
-    echo "module:module_put module:module_load kmem:mm_page_alloc"
6df2a7
-}
6df2a7
-
6df2a7
-get_event_filter() {
6df2a7
-    echo "comm == systemd-udevd || comm == modprobe || comm == insmod"
6df2a7
-}
6df2a7
-
6df2a7
-is_trace_ready() {
6df2a7
-    local trace_base want_events current_events
6df2a7
-
6df2a7
-    trace_base=$(get_trace_base)
6df2a7
-    ! [ -f "$trace_base/tracing/trace" ] && return 1
6df2a7
-
6df2a7
-    [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1
6df2a7
-
6df2a7
-    # Also check if trace events were properly setup.
6df2a7
-    want_events=$(get_want_events)
6df2a7
-    current_events=$(echo $(cat $trace_base/tracing/set_event))
6df2a7
-    [ "$current_events" != "$want_events" ] && return 1
6df2a7
-
6df2a7
-    return 0
6df2a7
-}
6df2a7
-
6df2a7
-prepare_trace() {
6df2a7
-    local trace_base
6df2a7
-
6df2a7
-    trace_base=$(get_trace_base)
6df2a7
-    # old debugfs interface case.
6df2a7
-    if ! [ -d "$trace_base/tracing" ]; then
6df2a7
-        mount none -t debugfs $trace_base
6df2a7
-    # new tracefs interface case.
6df2a7
-    elif ! [ -f "$trace_base/tracing/trace" ]; then
6df2a7
-        mount none -t tracefs "$trace_base/tracing"
6df2a7
-    fi
6df2a7
-
6df2a7
-    if ! [ -f "$trace_base/tracing/trace" ]; then
6df2a7
-        echo "WARN: Mount trace failed for kernel module memory analyzing."
6df2a7
-        return 1
6df2a7
-    fi
6df2a7
-
6df2a7
-    # Active all the wanted trace events.
6df2a7
-    echo "$(get_want_events)" > $trace_base/tracing/set_event
6df2a7
-
6df2a7
-    # There are three kinds of known applications for module loading:
6df2a7
-    # "systemd-udevd", "modprobe" and "insmod".
6df2a7
-    # Set them as the global events filter.
6df2a7
-    # NOTE: Some kernel may not support this format of filter, anyway
6df2a7
-    #       the operation will fail and it doesn't matter.
6df2a7
-    echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1
6df2a7
-    echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1
6df2a7
-
6df2a7
-    # Set the number of comm-pid if supported.
6df2a7
-    if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then
6df2a7
-        # Thanks to filters, 4096 is big enough(also well supported).
6df2a7
-        echo 4096 > $trace_base/tracing/saved_cmdlines_size
6df2a7
-    fi
6df2a7
-
6df2a7
-    # Enable and clear trace data for the first time.
6df2a7
-    echo 1 > $trace_base/tracing/tracing_on
6df2a7
-    echo > $trace_base/tracing/trace
6df2a7
-    echo "Prepare trace success."
6df2a7
-    return 0
6df2a7
-}
6df2a7
-
6df2a7
-order_to_pages()
6df2a7
-{
6df2a7
-    local pages=1
6df2a7
-    local order=$1
6df2a7
-
6df2a7
-    while [ "$order" != 0 ]; do
6df2a7
-        order=$((order-1))
6df2a7
-        pages=$(($pages*2))
6df2a7
-	done
6df2a7
-
6df2a7
-    echo $pages
6df2a7
-}
6df2a7
-
6df2a7
-parse_trace_data() {
6df2a7
-    local module_name tmp_eval pages
6df2a7
-
6df2a7
-    cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args
6df2a7
-    do
6df2a7
-        # Skip comment lines
6df2a7
-        if [ "$pid" = "#" ]; then
6df2a7
-            continue
6df2a7
-        fi
6df2a7
-
6df2a7
-        pid=${pid##*-}
6df2a7
-        function=${function%:}
6df2a7
-        if [ "$function" = "module_load" ]; then
6df2a7
-            # One module is being loaded, save the task pid for tracking.
6df2a7
-            # Remove the trailing after whitespace, there may be the module flags.
6df2a7
-            module_name=${args%% *}
6df2a7
-            # Mark current_module to track the task.
6df2a7
-            eval current_module_$pid="$module_name"
6df2a7
-            tmp_eval=$(eval echo '${module_loaded_'${module_name}'}')
6df2a7
-            if [ -n "$tmp_eval" ]; then
6df2a7
-                echo "WARN: \"$module_name\" was loaded multiple times!"
6df2a7
-            fi
6df2a7
-            eval unset module_loaded_$module_name
6df2a7
-            eval nr_alloc_pages_$module_name=0
6df2a7
-            continue
6df2a7
-        fi
6df2a7
-
6df2a7
-        module_name=$(eval echo '${current_module_'${pid}'}')
6df2a7
-        if [ -z "$module_name" ]; then
6df2a7
-            continue
6df2a7
-        fi
6df2a7
-
6df2a7
-        # Once we get here, the task is being tracked(is loading a module).
6df2a7
-        if [ "$function" = "module_put" ]; then
6df2a7
-            # Mark the module as loaded when the first module_put event happens after module_load.
6df2a7
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
6df2a7
-            echo "$tmp_eval pages consumed by \"$module_name\""
6df2a7
-            eval module_loaded_$module_name=1
6df2a7
-            # Module loading finished, so untrack the task.
6df2a7
-            eval unset current_module_$pid
6df2a7
-            eval unset nr_alloc_pages_$module_name
6df2a7
-            continue
6df2a7
-        fi
6df2a7
-
6df2a7
-        if [ "$function" = "mm_page_alloc" ]; then
6df2a7
-            # Get order first, then convert to actual pages.
6df2a7
-            pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/')
6df2a7
-            pages=$(order_to_pages "$pages")
6df2a7
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
6df2a7
-            eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))"
6df2a7
-        fi
6df2a7
-    done
6df2a7
-}
6df2a7
-
6df2a7
-cleanup_trace() {
6df2a7
-    local trace_base
6df2a7
-
6df2a7
-    if is_trace_ready; then
6df2a7
-        trace_base=$(get_trace_base)
6df2a7
-        echo 0 > $trace_base/tracing/tracing_on
6df2a7
-        echo > $trace_base/tracing/trace
6df2a7
-        echo > $trace_base/tracing/set_event
6df2a7
-        echo 0 > $trace_base/tracing/events/kmem/filter
6df2a7
-        echo 0 > $trace_base/tracing/events/module/filter
6df2a7
-    fi
6df2a7
-}
6df2a7
-
6df2a7
-show_usage() {
6df2a7
-    echo "Find out kernel modules with large memory consumption during loading based on trace."
6df2a7
-    echo "Usage:"
6df2a7
-    echo "1) run it first to setup trace."
6df2a7
-    echo "2) run again to parse the trace data if any."
6df2a7
-    echo "3) run with \"--cleanup\" option to cleanup trace after use."
6df2a7
-}
6df2a7
-
6df2a7
-if [ "$1" = "--help" ]; then
6df2a7
-    show_usage
6df2a7
-    exit 0
6df2a7
-fi
6df2a7
-
6df2a7
-if [ "$1" = "--cleanup" ]; then
6df2a7
-    cleanup_trace
6df2a7
-    exit 0
6df2a7
-fi
6df2a7
-
6df2a7
-if is_trace_ready ; then
6df2a7
-    echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)"
6df2a7
-    parse_trace_data
6df2a7
-else
6df2a7
-    prepare_trace
6df2a7
-fi
6df2a7
-
6df2a7
-exit $?
6df2a7
diff --git a/modules.d/99base/module-setup.sh b/modules.d/99base/module-setup.sh
6df2a7
index c9ead01d..3578643c 100755
6df2a7
--- a/modules.d/99base/module-setup.sh
6df2a7
+++ b/modules.d/99base/module-setup.sh
6df2a7
@@ -35,7 +35,6 @@ install() {
6df2a7
     inst_script "$moddir/initqueue.sh" "/sbin/initqueue"
6df2a7
     inst_script "$moddir/loginit.sh" "/sbin/loginit"
6df2a7
     inst_script "$moddir/rdsosreport.sh" "/sbin/rdsosreport"
6df2a7
-    inst_script "$moddir/memtrace-ko.sh" "/sbin/tracekomem"
6df2a7
 
6df2a7
     [ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib
6df2a7
     mkdir -m 0755 -p ${initdir}/lib/dracut
6df2a7