17094c
From cf74193cc37f2ace1197b1e6ebacc6fe542767c8 Mon Sep 17 00:00:00 2001
17094c
From: Kairui Song <kasong@redhat.com>
17094c
Date: Wed, 8 Apr 2020 16:39:52 +0800
17094c
Subject: [PATCH] Remove memtrace-ko and rd.memdebug=4 support in dracut
17094c
17094c
This feature could be off loaded to memstrack, which have better
17094c
accurecy, better performance, and have more detailed tracing features.
17094c
17094c
Also simplify make_trace_mem a bit.
17094c
17094c
And currently rd.memdebug=4 is unstable, fails from time to time.
17094c
17094c
(cherry picked from commit 49c4172f4eef6e2015615e132b199a7ec0699ffc)
17094c
17094c
Resolves: #1829528
17094c
---
17094c
 dracut.cmdline.7.asc                             |   4 +-
17094c
 modules.d/98dracut-systemd/dracut-cmdline.sh     |   2 +-
17094c
 modules.d/98dracut-systemd/dracut-pre-mount.sh   |   2 +-
17094c
 modules.d/98dracut-systemd/dracut-pre-pivot.sh   |   2 +-
17094c
 modules.d/98dracut-systemd/dracut-pre-trigger.sh |   2 +-
17094c
 modules.d/99base/dracut-lib.sh                   |  44 +-----
17094c
 modules.d/99base/init.sh                         |   8 +-
17094c
 modules.d/99base/memtrace-ko.sh                  | 191 -----------------------
17094c
 modules.d/99base/module-setup.sh                 |   1 -
17094c
 9 files changed, 18 insertions(+), 238 deletions(-)
17094c
17094c
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
17094c
index ab9a24ad..0dc58d1a 100644
17094c
--- a/dracut.cmdline.7.asc
17094c
+++ b/dracut.cmdline.7.asc
17094c
@@ -187,9 +187,9 @@ It should be attached to any report about dracut problems.
17094c
     _/run/initramfs/init.log_.
17094c
     If "quiet" is set, it also logs to the console.
17094c
 
17094c
-**rd.memdebug=[0-4]**::
17094c
+**rd.memdebug=[0-3]**::
17094c
     Print memory usage info at various points, set the verbose level from 0 to 4.
17094c
-+    
17094c
++
17094c
     Higher level means more debugging output:
17094c
 +
17094c
 ----
17094c
diff --git a/modules.d/98dracut-systemd/dracut-cmdline.sh b/modules.d/98dracut-systemd/dracut-cmdline.sh
17094c
index bff9435a..6c6ee026 100755
17094c
--- a/modules.d/98dracut-systemd/dracut-cmdline.sh
17094c
+++ b/modules.d/98dracut-systemd/dracut-cmdline.sh
17094c
@@ -42,7 +42,7 @@ export root
17094c
 export rflags
17094c
 export fstype
17094c
 
17094c
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
17094c
 # run scriptlets to parse the command line
17094c
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
17094c
 source_hook cmdline
17094c
diff --git a/modules.d/98dracut-systemd/dracut-pre-mount.sh b/modules.d/98dracut-systemd/dracut-pre-mount.sh
17094c
index a3b9d291..ae511286 100755
17094c
--- a/modules.d/98dracut-systemd/dracut-pre-mount.sh
17094c
+++ b/modules.d/98dracut-systemd/dracut-pre-mount.sh
17094c
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
17094c
 
17094c
 source_conf /etc/conf.d
17094c
 
17094c
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
17094c
 # pre pivot scripts are sourced just before we doing cleanup and switch over
17094c
 # to the new root.
17094c
 getarg 'rd.break=pre-mount' 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
17094c
diff --git a/modules.d/98dracut-systemd/dracut-pre-pivot.sh b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
17094c
index dc9a2504..96e6f8ca 100755
17094c
--- a/modules.d/98dracut-systemd/dracut-pre-pivot.sh
17094c
+++ b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
17094c
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
17094c
 
17094c
 source_conf /etc/conf.d
17094c
 
17094c
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
17094c
 # pre pivot scripts are sourced just before we doing cleanup and switch over
17094c
 # to the new root.
17094c
 getarg 'rd.break=pre-pivot' 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
17094c
diff --git a/modules.d/98dracut-systemd/dracut-pre-trigger.sh b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
17094c
index 7cd821ed..a1a33960 100755
17094c
--- a/modules.d/98dracut-systemd/dracut-pre-trigger.sh
17094c
+++ b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
17094c
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
17094c
 
17094c
 source_conf /etc/conf.d
17094c
 
17094c
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
17094c
 
17094c
 source_hook pre-trigger
17094c
 
17094c
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
17094c
index f71e757c..502c7568 100755
17094c
--- a/modules.d/99base/dracut-lib.sh
17094c
+++ b/modules.d/99base/dracut-lib.sh
17094c
@@ -1220,50 +1220,25 @@ are_lists_eq() {
17094c
 
17094c
 setmemdebug() {
17094c
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
17094c
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 4 rd.memdebug)
17094c
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
17094c
     fi
17094c
 }
17094c
 
17094c
 setmemdebug
17094c
 
17094c
-cleanup_trace_mem()
17094c
-{
17094c
-    # tracekomem based on kernel trace needs cleanup after use.
17094c
-    if [ "$DEBUG_MEM_LEVEL" -eq 4 ]; then
17094c
-        tracekomem --cleanup
17094c
-    fi
17094c
-}
17094c
-
17094c
-# parameters: msg [trace_level:trace]...
17094c
-make_trace_mem()
17094c
-{
17094c
-    local msg
17094c
-    msg="$1"
17094c
-    shift
17094c
-    if [ -n "$DEBUG_MEM_LEVEL" ] && [ "$DEBUG_MEM_LEVEL" -gt 0 ]; then
17094c
-        make_trace show_memstats $DEBUG_MEM_LEVEL "[debug_mem]" "$msg" "$@" >&2
17094c
-    fi
17094c
-}
17094c
-
17094c
 # parameters: func log_level prefix msg [trace_level:trace]...
17094c
-make_trace()
17094c
+make_trace_mem()
17094c
 {
17094c
-    local func log_level prefix msg msg_printed
17094c
+    local log_level prefix msg msg_printed
17094c
     local trace trace_level trace_in_higher_levels insert_trace
17094c
 
17094c
-    func=$1
17094c
-    shift
17094c
-
17094c
-    log_level=$1
17094c
-    shift
17094c
-
17094c
-    prefix=$1
17094c
-    shift
17094c
-
17094c
     msg=$1
17094c
     shift
17094c
 
17094c
-    if [ -z "$log_level" ]; then
17094c
+    prefix='[debug_mem]'
17094c
+    log_level=$DEBUG_MEM_LEVEL
17094c
+
17094c
+    if [ -z "$log_level" ] || [ "$log_level" -le 0 ]; then
17094c
         return
17094c
     fi
17094c
 
17094c
@@ -1296,7 +1271,7 @@ make_trace()
17094c
                 echo "$prefix $msg"
17094c
                 msg_printed=1
17094c
             fi
17094c
-            $func $trace
17094c
+            show_memstats $trace
17094c
         fi
17094c
         shift
17094c
     done
17094c
@@ -1318,9 +1293,6 @@ show_memstats()
17094c
         iomem)
17094c
             cat /proc/iomem
17094c
             ;;
17094c
-        komem)
17094c
-            tracekomem
17094c
-            ;;
17094c
     esac
17094c
 }
17094c
 
17094c
diff --git a/modules.d/99base/init.sh b/modules.d/99base/init.sh
17094c
index e4f7cff1..2c0ccd66 100755
17094c
--- a/modules.d/99base/init.sh
17094c
+++ b/modules.d/99base/init.sh
17094c
@@ -131,7 +131,7 @@ if ! getargbool 1 'rd.hostonly'; then
17094c
 fi
17094c
 
17094c
 # run scriptlets to parse the command line
17094c
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
17094c
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
17094c
 source_hook cmdline
17094c
 
17094c
@@ -160,7 +160,7 @@ fi
17094c
 
17094c
 udevproperty "hookdir=$hookdir"
17094c
 
17094c
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
17094c
 getarg 'rd.break=pre-trigger' -d 'rdbreak=pre-trigger' && emergency_shell -n pre-trigger "Break before pre-trigger"
17094c
 source_hook pre-trigger
17094c
 
17094c
@@ -230,7 +230,7 @@ unset RDRETRY
17094c
 
17094c
 # pre-mount happens before we try to mount the root filesystem,
17094c
 # and happens once.
17094c
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
17094c
 getarg 'rd.break=pre-mount' -d 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
17094c
 source_hook pre-mount
17094c
 
17094c
@@ -266,7 +266,7 @@ done
17094c
 
17094c
 # pre pivot scripts are sourced just before we doing cleanup and switch over
17094c
 # to the new root.
17094c
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
17094c
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
17094c
 getarg 'rd.break=pre-pivot' -d 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
17094c
 source_hook pre-pivot
17094c
 
17094c
diff --git a/modules.d/99base/memtrace-ko.sh b/modules.d/99base/memtrace-ko.sh
17094c
deleted file mode 100755
17094c
index ee035e15..00000000
17094c
--- a/modules.d/99base/memtrace-ko.sh
17094c
+++ /dev/null
17094c
@@ -1,191 +0,0 @@
17094c
-#!/bin/sh
17094c
-
17094c
-# Try to find out kernel modules with large total memory allocation during loading.
17094c
-# For large slab allocation, it will fall into buddy, also not trace "mm_page_free"
17094c
-# considering large free is quite rare for module_init, thus saving tons of events
17094c
-# to avoid trace data overwritten.
17094c
-#
17094c
-# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose.
17094c
-
17094c
-# "sys/kernel/tracing" has the priority if exists.
17094c
-get_trace_base() {
17094c
-    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
17094c
-    if [ -d "/sys/kernel/tracing" ]; then
17094c
-        echo "/sys/kernel"
17094c
-    else
17094c
-        echo "/sys/kernel/debug"
17094c
-    fi
17094c
-}
17094c
-
17094c
-# We want to enable these trace events.
17094c
-get_want_events() {
17094c
-    echo "module:module_put module:module_load kmem:mm_page_alloc"
17094c
-}
17094c
-
17094c
-get_event_filter() {
17094c
-    echo "comm == systemd-udevd || comm == modprobe || comm == insmod"
17094c
-}
17094c
-
17094c
-is_trace_ready() {
17094c
-    local trace_base want_events current_events
17094c
-
17094c
-    trace_base=$(get_trace_base)
17094c
-    ! [ -f "$trace_base/tracing/trace" ] && return 1
17094c
-
17094c
-    [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1
17094c
-
17094c
-    # Also check if trace events were properly setup.
17094c
-    want_events=$(get_want_events)
17094c
-    current_events=$(echo $(cat $trace_base/tracing/set_event))
17094c
-    [ "$current_events" != "$want_events" ] && return 1
17094c
-
17094c
-    return 0
17094c
-}
17094c
-
17094c
-prepare_trace() {
17094c
-    local trace_base
17094c
-
17094c
-    trace_base=$(get_trace_base)
17094c
-    # old debugfs interface case.
17094c
-    if ! [ -d "$trace_base/tracing" ]; then
17094c
-        mount none -t debugfs $trace_base
17094c
-    # new tracefs interface case.
17094c
-    elif ! [ -f "$trace_base/tracing/trace" ]; then
17094c
-        mount none -t tracefs "$trace_base/tracing"
17094c
-    fi
17094c
-
17094c
-    if ! [ -f "$trace_base/tracing/trace" ]; then
17094c
-        echo "WARN: Mount trace failed for kernel module memory analyzing."
17094c
-        return 1
17094c
-    fi
17094c
-
17094c
-    # Active all the wanted trace events.
17094c
-    echo "$(get_want_events)" > $trace_base/tracing/set_event
17094c
-
17094c
-    # There are three kinds of known applications for module loading:
17094c
-    # "systemd-udevd", "modprobe" and "insmod".
17094c
-    # Set them as the global events filter.
17094c
-    # NOTE: Some kernel may not support this format of filter, anyway
17094c
-    #       the operation will fail and it doesn't matter.
17094c
-    echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1
17094c
-    echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1
17094c
-
17094c
-    # Set the number of comm-pid if supported.
17094c
-    if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then
17094c
-        # Thanks to filters, 4096 is big enough(also well supported).
17094c
-        echo 4096 > $trace_base/tracing/saved_cmdlines_size
17094c
-    fi
17094c
-
17094c
-    # Enable and clear trace data for the first time.
17094c
-    echo 1 > $trace_base/tracing/tracing_on
17094c
-    echo > $trace_base/tracing/trace
17094c
-    echo "Prepare trace success."
17094c
-    return 0
17094c
-}
17094c
-
17094c
-order_to_pages()
17094c
-{
17094c
-    local pages=1
17094c
-    local order=$1
17094c
-
17094c
-    while [ "$order" != 0 ]; do
17094c
-        order=$((order-1))
17094c
-        pages=$(($pages*2))
17094c
-	done
17094c
-
17094c
-    echo $pages
17094c
-}
17094c
-
17094c
-parse_trace_data() {
17094c
-    local module_name tmp_eval pages
17094c
-
17094c
-    cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args
17094c
-    do
17094c
-        # Skip comment lines
17094c
-        if [ "$pid" = "#" ]; then
17094c
-            continue
17094c
-        fi
17094c
-
17094c
-        pid=${pid##*-}
17094c
-        function=${function%:}
17094c
-        if [ "$function" = "module_load" ]; then
17094c
-            # One module is being loaded, save the task pid for tracking.
17094c
-            # Remove the trailing after whitespace, there may be the module flags.
17094c
-            module_name=${args%% *}
17094c
-            # Mark current_module to track the task.
17094c
-            eval current_module_$pid="$module_name"
17094c
-            tmp_eval=$(eval echo '${module_loaded_'${module_name}'}')
17094c
-            if [ -n "$tmp_eval" ]; then
17094c
-                echo "WARN: \"$module_name\" was loaded multiple times!"
17094c
-            fi
17094c
-            eval unset module_loaded_$module_name
17094c
-            eval nr_alloc_pages_$module_name=0
17094c
-            continue
17094c
-        fi
17094c
-
17094c
-        module_name=$(eval echo '${current_module_'${pid}'}')
17094c
-        if [ -z "$module_name" ]; then
17094c
-            continue
17094c
-        fi
17094c
-
17094c
-        # Once we get here, the task is being tracked(is loading a module).
17094c
-        if [ "$function" = "module_put" ]; then
17094c
-            # Mark the module as loaded when the first module_put event happens after module_load.
17094c
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
17094c
-            echo "$tmp_eval pages consumed by \"$module_name\""
17094c
-            eval module_loaded_$module_name=1
17094c
-            # Module loading finished, so untrack the task.
17094c
-            eval unset current_module_$pid
17094c
-            eval unset nr_alloc_pages_$module_name
17094c
-            continue
17094c
-        fi
17094c
-
17094c
-        if [ "$function" = "mm_page_alloc" ]; then
17094c
-            # Get order first, then convert to actual pages.
17094c
-            pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/')
17094c
-            pages=$(order_to_pages "$pages")
17094c
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
17094c
-            eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))"
17094c
-        fi
17094c
-    done
17094c
-}
17094c
-
17094c
-cleanup_trace() {
17094c
-    local trace_base
17094c
-
17094c
-    if is_trace_ready; then
17094c
-        trace_base=$(get_trace_base)
17094c
-        echo 0 > $trace_base/tracing/tracing_on
17094c
-        echo > $trace_base/tracing/trace
17094c
-        echo > $trace_base/tracing/set_event
17094c
-        echo 0 > $trace_base/tracing/events/kmem/filter
17094c
-        echo 0 > $trace_base/tracing/events/module/filter
17094c
-    fi
17094c
-}
17094c
-
17094c
-show_usage() {
17094c
-    echo "Find out kernel modules with large memory consumption during loading based on trace."
17094c
-    echo "Usage:"
17094c
-    echo "1) run it first to setup trace."
17094c
-    echo "2) run again to parse the trace data if any."
17094c
-    echo "3) run with \"--cleanup\" option to cleanup trace after use."
17094c
-}
17094c
-
17094c
-if [ "$1" = "--help" ]; then
17094c
-    show_usage
17094c
-    exit 0
17094c
-fi
17094c
-
17094c
-if [ "$1" = "--cleanup" ]; then
17094c
-    cleanup_trace
17094c
-    exit 0
17094c
-fi
17094c
-
17094c
-if is_trace_ready ; then
17094c
-    echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)"
17094c
-    parse_trace_data
17094c
-else
17094c
-    prepare_trace
17094c
-fi
17094c
-
17094c
-exit $?
17094c
diff --git a/modules.d/99base/module-setup.sh b/modules.d/99base/module-setup.sh
17094c
index c9ead01d..3578643c 100755
17094c
--- a/modules.d/99base/module-setup.sh
17094c
+++ b/modules.d/99base/module-setup.sh
17094c
@@ -35,7 +35,6 @@ install() {
17094c
     inst_script "$moddir/initqueue.sh" "/sbin/initqueue"
17094c
     inst_script "$moddir/loginit.sh" "/sbin/loginit"
17094c
     inst_script "$moddir/rdsosreport.sh" "/sbin/rdsosreport"
17094c
-    inst_script "$moddir/memtrace-ko.sh" "/sbin/tracekomem"
17094c
 
17094c
     [ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib
17094c
     mkdir -m 0755 -p ${initdir}/lib/dracut
17094c