6edea0
From cf74193cc37f2ace1197b1e6ebacc6fe542767c8 Mon Sep 17 00:00:00 2001
6edea0
From: Kairui Song <kasong@redhat.com>
6edea0
Date: Wed, 8 Apr 2020 16:39:52 +0800
6edea0
Subject: [PATCH] Remove memtrace-ko and rd.memdebug=4 support in dracut
6edea0
6edea0
This feature could be off loaded to memstrack, which have better
6edea0
accurecy, better performance, and have more detailed tracing features.
6edea0
6edea0
Also simplify make_trace_mem a bit.
6edea0
6edea0
And currently rd.memdebug=4 is unstable, fails from time to time.
6edea0
6edea0
(cherry picked from commit 49c4172f4eef6e2015615e132b199a7ec0699ffc)
6edea0
6edea0
Resolves: #1829528
6edea0
---
6edea0
 dracut.cmdline.7.asc                             |   4 +-
6edea0
 modules.d/98dracut-systemd/dracut-cmdline.sh     |   2 +-
6edea0
 modules.d/98dracut-systemd/dracut-pre-mount.sh   |   2 +-
6edea0
 modules.d/98dracut-systemd/dracut-pre-pivot.sh   |   2 +-
6edea0
 modules.d/98dracut-systemd/dracut-pre-trigger.sh |   2 +-
6edea0
 modules.d/99base/dracut-lib.sh                   |  44 +-----
6edea0
 modules.d/99base/init.sh                         |   8 +-
6edea0
 modules.d/99base/memtrace-ko.sh                  | 191 -----------------------
6edea0
 modules.d/99base/module-setup.sh                 |   1 -
6edea0
 9 files changed, 18 insertions(+), 238 deletions(-)
6edea0
6edea0
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
6edea0
index ab9a24ad..0dc58d1a 100644
6edea0
--- a/dracut.cmdline.7.asc
6edea0
+++ b/dracut.cmdline.7.asc
6edea0
@@ -187,9 +187,9 @@ It should be attached to any report about dracut problems.
6edea0
     _/run/initramfs/init.log_.
6edea0
     If "quiet" is set, it also logs to the console.
6edea0
 
6edea0
-**rd.memdebug=[0-4]**::
6edea0
+**rd.memdebug=[0-3]**::
6edea0
     Print memory usage info at various points, set the verbose level from 0 to 4.
6edea0
-+    
6edea0
++
6edea0
     Higher level means more debugging output:
6edea0
 +
6edea0
 ----
6edea0
diff --git a/modules.d/98dracut-systemd/dracut-cmdline.sh b/modules.d/98dracut-systemd/dracut-cmdline.sh
6edea0
index bff9435a..6c6ee026 100755
6edea0
--- a/modules.d/98dracut-systemd/dracut-cmdline.sh
6edea0
+++ b/modules.d/98dracut-systemd/dracut-cmdline.sh
6edea0
@@ -42,7 +42,7 @@ export root
6edea0
 export rflags
6edea0
 export fstype
6edea0
 
6edea0
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
6edea0
 # run scriptlets to parse the command line
6edea0
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
6edea0
 source_hook cmdline
6edea0
diff --git a/modules.d/98dracut-systemd/dracut-pre-mount.sh b/modules.d/98dracut-systemd/dracut-pre-mount.sh
6edea0
index a3b9d291..ae511286 100755
6edea0
--- a/modules.d/98dracut-systemd/dracut-pre-mount.sh
6edea0
+++ b/modules.d/98dracut-systemd/dracut-pre-mount.sh
6edea0
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6edea0
 
6edea0
 source_conf /etc/conf.d
6edea0
 
6edea0
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6edea0
 # to the new root.
6edea0
 getarg 'rd.break=pre-mount' 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
6edea0
diff --git a/modules.d/98dracut-systemd/dracut-pre-pivot.sh b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6edea0
index dc9a2504..96e6f8ca 100755
6edea0
--- a/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6edea0
+++ b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
6edea0
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6edea0
 
6edea0
 source_conf /etc/conf.d
6edea0
 
6edea0
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6edea0
 # to the new root.
6edea0
 getarg 'rd.break=pre-pivot' 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
6edea0
diff --git a/modules.d/98dracut-systemd/dracut-pre-trigger.sh b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6edea0
index 7cd821ed..a1a33960 100755
6edea0
--- a/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6edea0
+++ b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
6edea0
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
6edea0
 
6edea0
 source_conf /etc/conf.d
6edea0
 
6edea0
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 
6edea0
 source_hook pre-trigger
6edea0
 
6edea0
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
6edea0
index f71e757c..502c7568 100755
6edea0
--- a/modules.d/99base/dracut-lib.sh
6edea0
+++ b/modules.d/99base/dracut-lib.sh
6edea0
@@ -1220,50 +1220,25 @@ are_lists_eq() {
6edea0
 
6edea0
 setmemdebug() {
6edea0
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
6edea0
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 4 rd.memdebug)
6edea0
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
6edea0
     fi
6edea0
 }
6edea0
 
6edea0
 setmemdebug
6edea0
 
6edea0
-cleanup_trace_mem()
6edea0
-{
6edea0
-    # tracekomem based on kernel trace needs cleanup after use.
6edea0
-    if [ "$DEBUG_MEM_LEVEL" -eq 4 ]; then
6edea0
-        tracekomem --cleanup
6edea0
-    fi
6edea0
-}
6edea0
-
6edea0
-# parameters: msg [trace_level:trace]...
6edea0
-make_trace_mem()
6edea0
-{
6edea0
-    local msg
6edea0
-    msg="$1"
6edea0
-    shift
6edea0
-    if [ -n "$DEBUG_MEM_LEVEL" ] && [ "$DEBUG_MEM_LEVEL" -gt 0 ]; then
6edea0
-        make_trace show_memstats $DEBUG_MEM_LEVEL "[debug_mem]" "$msg" "$@" >&2
6edea0
-    fi
6edea0
-}
6edea0
-
6edea0
 # parameters: func log_level prefix msg [trace_level:trace]...
6edea0
-make_trace()
6edea0
+make_trace_mem()
6edea0
 {
6edea0
-    local func log_level prefix msg msg_printed
6edea0
+    local log_level prefix msg msg_printed
6edea0
     local trace trace_level trace_in_higher_levels insert_trace
6edea0
 
6edea0
-    func=$1
6edea0
-    shift
6edea0
-
6edea0
-    log_level=$1
6edea0
-    shift
6edea0
-
6edea0
-    prefix=$1
6edea0
-    shift
6edea0
-
6edea0
     msg=$1
6edea0
     shift
6edea0
 
6edea0
-    if [ -z "$log_level" ]; then
6edea0
+    prefix='[debug_mem]'
6edea0
+    log_level=$DEBUG_MEM_LEVEL
6edea0
+
6edea0
+    if [ -z "$log_level" ] || [ "$log_level" -le 0 ]; then
6edea0
         return
6edea0
     fi
6edea0
 
6edea0
@@ -1296,7 +1271,7 @@ make_trace()
6edea0
                 echo "$prefix $msg"
6edea0
                 msg_printed=1
6edea0
             fi
6edea0
-            $func $trace
6edea0
+            show_memstats $trace
6edea0
         fi
6edea0
         shift
6edea0
     done
6edea0
@@ -1318,9 +1293,6 @@ show_memstats()
6edea0
         iomem)
6edea0
             cat /proc/iomem
6edea0
             ;;
6edea0
-        komem)
6edea0
-            tracekomem
6edea0
-            ;;
6edea0
     esac
6edea0
 }
6edea0
 
6edea0
diff --git a/modules.d/99base/init.sh b/modules.d/99base/init.sh
6edea0
index e4f7cff1..2c0ccd66 100755
6edea0
--- a/modules.d/99base/init.sh
6edea0
+++ b/modules.d/99base/init.sh
6edea0
@@ -131,7 +131,7 @@ if ! getargbool 1 'rd.hostonly'; then
6edea0
 fi
6edea0
 
6edea0
 # run scriptlets to parse the command line
6edea0
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
6edea0
 getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
6edea0
 source_hook cmdline
6edea0
 
6edea0
@@ -160,7 +160,7 @@ fi
6edea0
 
6edea0
 udevproperty "hookdir=$hookdir"
6edea0
 
6edea0
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 getarg 'rd.break=pre-trigger' -d 'rdbreak=pre-trigger' && emergency_shell -n pre-trigger "Break before pre-trigger"
6edea0
 source_hook pre-trigger
6edea0
 
6edea0
@@ -230,7 +230,7 @@ unset RDRETRY
6edea0
 
6edea0
 # pre-mount happens before we try to mount the root filesystem,
6edea0
 # and happens once.
6edea0
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 getarg 'rd.break=pre-mount' -d 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
6edea0
 source_hook pre-mount
6edea0
 
6edea0
@@ -266,7 +266,7 @@ done
6edea0
 
6edea0
 # pre pivot scripts are sourced just before we doing cleanup and switch over
6edea0
 # to the new root.
6edea0
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
6edea0
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
6edea0
 getarg 'rd.break=pre-pivot' -d 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
6edea0
 source_hook pre-pivot
6edea0
 
6edea0
diff --git a/modules.d/99base/memtrace-ko.sh b/modules.d/99base/memtrace-ko.sh
6edea0
deleted file mode 100755
6edea0
index ee035e15..00000000
6edea0
--- a/modules.d/99base/memtrace-ko.sh
6edea0
+++ /dev/null
6edea0
@@ -1,191 +0,0 @@
6edea0
-#!/bin/sh
6edea0
-
6edea0
-# Try to find out kernel modules with large total memory allocation during loading.
6edea0
-# For large slab allocation, it will fall into buddy, also not trace "mm_page_free"
6edea0
-# considering large free is quite rare for module_init, thus saving tons of events
6edea0
-# to avoid trace data overwritten.
6edea0
-#
6edea0
-# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose.
6edea0
-
6edea0
-# "sys/kernel/tracing" has the priority if exists.
6edea0
-get_trace_base() {
6edea0
-    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
6edea0
-    if [ -d "/sys/kernel/tracing" ]; then
6edea0
-        echo "/sys/kernel"
6edea0
-    else
6edea0
-        echo "/sys/kernel/debug"
6edea0
-    fi
6edea0
-}
6edea0
-
6edea0
-# We want to enable these trace events.
6edea0
-get_want_events() {
6edea0
-    echo "module:module_put module:module_load kmem:mm_page_alloc"
6edea0
-}
6edea0
-
6edea0
-get_event_filter() {
6edea0
-    echo "comm == systemd-udevd || comm == modprobe || comm == insmod"
6edea0
-}
6edea0
-
6edea0
-is_trace_ready() {
6edea0
-    local trace_base want_events current_events
6edea0
-
6edea0
-    trace_base=$(get_trace_base)
6edea0
-    ! [ -f "$trace_base/tracing/trace" ] && return 1
6edea0
-
6edea0
-    [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1
6edea0
-
6edea0
-    # Also check if trace events were properly setup.
6edea0
-    want_events=$(get_want_events)
6edea0
-    current_events=$(echo $(cat $trace_base/tracing/set_event))
6edea0
-    [ "$current_events" != "$want_events" ] && return 1
6edea0
-
6edea0
-    return 0
6edea0
-}
6edea0
-
6edea0
-prepare_trace() {
6edea0
-    local trace_base
6edea0
-
6edea0
-    trace_base=$(get_trace_base)
6edea0
-    # old debugfs interface case.
6edea0
-    if ! [ -d "$trace_base/tracing" ]; then
6edea0
-        mount none -t debugfs $trace_base
6edea0
-    # new tracefs interface case.
6edea0
-    elif ! [ -f "$trace_base/tracing/trace" ]; then
6edea0
-        mount none -t tracefs "$trace_base/tracing"
6edea0
-    fi
6edea0
-
6edea0
-    if ! [ -f "$trace_base/tracing/trace" ]; then
6edea0
-        echo "WARN: Mount trace failed for kernel module memory analyzing."
6edea0
-        return 1
6edea0
-    fi
6edea0
-
6edea0
-    # Active all the wanted trace events.
6edea0
-    echo "$(get_want_events)" > $trace_base/tracing/set_event
6edea0
-
6edea0
-    # There are three kinds of known applications for module loading:
6edea0
-    # "systemd-udevd", "modprobe" and "insmod".
6edea0
-    # Set them as the global events filter.
6edea0
-    # NOTE: Some kernel may not support this format of filter, anyway
6edea0
-    #       the operation will fail and it doesn't matter.
6edea0
-    echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1
6edea0
-    echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1
6edea0
-
6edea0
-    # Set the number of comm-pid if supported.
6edea0
-    if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then
6edea0
-        # Thanks to filters, 4096 is big enough(also well supported).
6edea0
-        echo 4096 > $trace_base/tracing/saved_cmdlines_size
6edea0
-    fi
6edea0
-
6edea0
-    # Enable and clear trace data for the first time.
6edea0
-    echo 1 > $trace_base/tracing/tracing_on
6edea0
-    echo > $trace_base/tracing/trace
6edea0
-    echo "Prepare trace success."
6edea0
-    return 0
6edea0
-}
6edea0
-
6edea0
-order_to_pages()
6edea0
-{
6edea0
-    local pages=1
6edea0
-    local order=$1
6edea0
-
6edea0
-    while [ "$order" != 0 ]; do
6edea0
-        order=$((order-1))
6edea0
-        pages=$(($pages*2))
6edea0
-	done
6edea0
-
6edea0
-    echo $pages
6edea0
-}
6edea0
-
6edea0
-parse_trace_data() {
6edea0
-    local module_name tmp_eval pages
6edea0
-
6edea0
-    cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args
6edea0
-    do
6edea0
-        # Skip comment lines
6edea0
-        if [ "$pid" = "#" ]; then
6edea0
-            continue
6edea0
-        fi
6edea0
-
6edea0
-        pid=${pid##*-}
6edea0
-        function=${function%:}
6edea0
-        if [ "$function" = "module_load" ]; then
6edea0
-            # One module is being loaded, save the task pid for tracking.
6edea0
-            # Remove the trailing after whitespace, there may be the module flags.
6edea0
-            module_name=${args%% *}
6edea0
-            # Mark current_module to track the task.
6edea0
-            eval current_module_$pid="$module_name"
6edea0
-            tmp_eval=$(eval echo '${module_loaded_'${module_name}'}')
6edea0
-            if [ -n "$tmp_eval" ]; then
6edea0
-                echo "WARN: \"$module_name\" was loaded multiple times!"
6edea0
-            fi
6edea0
-            eval unset module_loaded_$module_name
6edea0
-            eval nr_alloc_pages_$module_name=0
6edea0
-            continue
6edea0
-        fi
6edea0
-
6edea0
-        module_name=$(eval echo '${current_module_'${pid}'}')
6edea0
-        if [ -z "$module_name" ]; then
6edea0
-            continue
6edea0
-        fi
6edea0
-
6edea0
-        # Once we get here, the task is being tracked(is loading a module).
6edea0
-        if [ "$function" = "module_put" ]; then
6edea0
-            # Mark the module as loaded when the first module_put event happens after module_load.
6edea0
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
6edea0
-            echo "$tmp_eval pages consumed by \"$module_name\""
6edea0
-            eval module_loaded_$module_name=1
6edea0
-            # Module loading finished, so untrack the task.
6edea0
-            eval unset current_module_$pid
6edea0
-            eval unset nr_alloc_pages_$module_name
6edea0
-            continue
6edea0
-        fi
6edea0
-
6edea0
-        if [ "$function" = "mm_page_alloc" ]; then
6edea0
-            # Get order first, then convert to actual pages.
6edea0
-            pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/')
6edea0
-            pages=$(order_to_pages "$pages")
6edea0
-            tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
6edea0
-            eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))"
6edea0
-        fi
6edea0
-    done
6edea0
-}
6edea0
-
6edea0
-cleanup_trace() {
6edea0
-    local trace_base
6edea0
-
6edea0
-    if is_trace_ready; then
6edea0
-        trace_base=$(get_trace_base)
6edea0
-        echo 0 > $trace_base/tracing/tracing_on
6edea0
-        echo > $trace_base/tracing/trace
6edea0
-        echo > $trace_base/tracing/set_event
6edea0
-        echo 0 > $trace_base/tracing/events/kmem/filter
6edea0
-        echo 0 > $trace_base/tracing/events/module/filter
6edea0
-    fi
6edea0
-}
6edea0
-
6edea0
-show_usage() {
6edea0
-    echo "Find out kernel modules with large memory consumption during loading based on trace."
6edea0
-    echo "Usage:"
6edea0
-    echo "1) run it first to setup trace."
6edea0
-    echo "2) run again to parse the trace data if any."
6edea0
-    echo "3) run with \"--cleanup\" option to cleanup trace after use."
6edea0
-}
6edea0
-
6edea0
-if [ "$1" = "--help" ]; then
6edea0
-    show_usage
6edea0
-    exit 0
6edea0
-fi
6edea0
-
6edea0
-if [ "$1" = "--cleanup" ]; then
6edea0
-    cleanup_trace
6edea0
-    exit 0
6edea0
-fi
6edea0
-
6edea0
-if is_trace_ready ; then
6edea0
-    echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)"
6edea0
-    parse_trace_data
6edea0
-else
6edea0
-    prepare_trace
6edea0
-fi
6edea0
-
6edea0
-exit $?
6edea0
diff --git a/modules.d/99base/module-setup.sh b/modules.d/99base/module-setup.sh
6edea0
index c9ead01d..3578643c 100755
6edea0
--- a/modules.d/99base/module-setup.sh
6edea0
+++ b/modules.d/99base/module-setup.sh
6edea0
@@ -35,7 +35,6 @@ install() {
6edea0
     inst_script "$moddir/initqueue.sh" "/sbin/initqueue"
6edea0
     inst_script "$moddir/loginit.sh" "/sbin/loginit"
6edea0
     inst_script "$moddir/rdsosreport.sh" "/sbin/rdsosreport"
6edea0
-    inst_script "$moddir/memtrace-ko.sh" "/sbin/tracekomem"
6edea0
 
6edea0
     [ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib
6edea0
     mkdir -m 0755 -p ${initdir}/lib/dracut
6edea0