From 49c4172f4eef6e2015615e132b199a7ec0699ffc Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 8 Apr 2020 16:39:52 +0800 Subject: [PATCH] Remove memtrace-ko and rd.memdebug=4 support in dracut This feature could be off loaded to memstrack, which have better accurecy, better performance, and have more detailed tracing features. Also simplify make_trace_mem a bit. And currently rd.memdebug=4 is unstable, fails from time to time. --- dracut.cmdline.7.asc | 4 +- modules.d/98dracut-systemd/dracut-cmdline.sh | 2 +- modules.d/98dracut-systemd/dracut-pre-mount.sh | 2 +- modules.d/98dracut-systemd/dracut-pre-pivot.sh | 2 +- modules.d/98dracut-systemd/dracut-pre-trigger.sh | 2 +- modules.d/99base/dracut-lib.sh | 44 +----- modules.d/99base/init.sh | 8 +- modules.d/99base/memtrace-ko.sh | 191 ----------------------- modules.d/99base/module-setup.sh | 1 - 9 files changed, 18 insertions(+), 238 deletions(-) diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc index 2b38aa33..c53601ea 100644 --- a/dracut.cmdline.7.asc +++ b/dracut.cmdline.7.asc @@ -188,9 +188,9 @@ It should be attached to any report about dracut problems. _/run/initramfs/init.log_. If "quiet" is set, it also logs to the console. -**rd.memdebug=[0-4]**:: +**rd.memdebug=[0-3]**:: Print memory usage info at various points, set the verbose level from 0 to 4. -+ ++ Higher level means more debugging output: + ---- diff --git a/modules.d/98dracut-systemd/dracut-cmdline.sh b/modules.d/98dracut-systemd/dracut-cmdline.sh index bff9435a..6c6ee026 100755 --- a/modules.d/98dracut-systemd/dracut-cmdline.sh +++ b/modules.d/98dracut-systemd/dracut-cmdline.sh @@ -42,7 +42,7 @@ export root export rflags export fstype -make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem' +make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' # run scriptlets to parse the command line getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline" source_hook cmdline diff --git a/modules.d/98dracut-systemd/dracut-pre-mount.sh b/modules.d/98dracut-systemd/dracut-pre-mount.sh index a3b9d291..ae511286 100755 --- a/modules.d/98dracut-systemd/dracut-pre-mount.sh +++ b/modules.d/98dracut-systemd/dracut-pre-mount.sh @@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh source_conf /etc/conf.d -make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' # pre pivot scripts are sourced just before we doing cleanup and switch over # to the new root. getarg 'rd.break=pre-mount' 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount" diff --git a/modules.d/98dracut-systemd/dracut-pre-pivot.sh b/modules.d/98dracut-systemd/dracut-pre-pivot.sh index dc9a2504..96e6f8ca 100755 --- a/modules.d/98dracut-systemd/dracut-pre-pivot.sh +++ b/modules.d/98dracut-systemd/dracut-pre-pivot.sh @@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh source_conf /etc/conf.d -make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' # pre pivot scripts are sourced just before we doing cleanup and switch over # to the new root. getarg 'rd.break=pre-pivot' 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot" diff --git a/modules.d/98dracut-systemd/dracut-pre-trigger.sh b/modules.d/98dracut-systemd/dracut-pre-trigger.sh index 7cd821ed..a1a33960 100755 --- a/modules.d/98dracut-systemd/dracut-pre-trigger.sh +++ b/modules.d/98dracut-systemd/dracut-pre-trigger.sh @@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh source_conf /etc/conf.d -make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' source_hook pre-trigger diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh index c57523d3..b7020892 100755 --- a/modules.d/99base/dracut-lib.sh +++ b/modules.d/99base/dracut-lib.sh @@ -1187,50 +1187,25 @@ are_lists_eq() { setmemdebug() { if [ -z "$DEBUG_MEM_LEVEL" ]; then - export DEBUG_MEM_LEVEL=$(getargnum 0 0 4 rd.memdebug) + export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug) fi } setmemdebug -cleanup_trace_mem() -{ - # tracekomem based on kernel trace needs cleanup after use. - if [ "$DEBUG_MEM_LEVEL" -eq 4 ]; then - tracekomem --cleanup - fi -} - -# parameters: msg [trace_level:trace]... -make_trace_mem() -{ - local msg - msg="$1" - shift - if [ -n "$DEBUG_MEM_LEVEL" ] && [ "$DEBUG_MEM_LEVEL" -gt 0 ]; then - make_trace show_memstats $DEBUG_MEM_LEVEL "[debug_mem]" "$msg" "$@" >&2 - fi -} - # parameters: func log_level prefix msg [trace_level:trace]... -make_trace() +make_trace_mem() { - local func log_level prefix msg msg_printed + local log_level prefix msg msg_printed local trace trace_level trace_in_higher_levels insert_trace - func=$1 - shift - - log_level=$1 - shift - - prefix=$1 - shift - msg=$1 shift - if [ -z "$log_level" ]; then + prefix='[debug_mem]' + log_level=$DEBUG_MEM_LEVEL + + if [ -z "$log_level" ] || [ "$log_level" -le 0 ]; then return fi @@ -1263,7 +1238,7 @@ make_trace() echo "$prefix $msg" msg_printed=1 fi - $func $trace + show_memstats $trace fi shift done @@ -1285,9 +1260,6 @@ show_memstats() iomem) cat /proc/iomem ;; - komem) - tracekomem - ;; esac } diff --git a/modules.d/99base/init.sh b/modules.d/99base/init.sh index 1111d09b..148ce831 100755 --- a/modules.d/99base/init.sh +++ b/modules.d/99base/init.sh @@ -131,7 +131,7 @@ if ! getargbool 1 'rd.hostonly'; then fi # run scriptlets to parse the command line -make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem' +make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline" source_hook cmdline @@ -160,7 +160,7 @@ fi udevproperty "hookdir=$hookdir" -make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' getarg 'rd.break=pre-trigger' -d 'rdbreak=pre-trigger' && emergency_shell -n pre-trigger "Break before pre-trigger" source_hook pre-trigger @@ -230,7 +230,7 @@ unset RDRETRY # pre-mount happens before we try to mount the root filesystem, # and happens once. -make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' getarg 'rd.break=pre-mount' -d 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount" source_hook pre-mount @@ -266,7 +266,7 @@ done # pre pivot scripts are sourced just before we doing cleanup and switch over # to the new root. -make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem' +make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' getarg 'rd.break=pre-pivot' -d 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot" source_hook pre-pivot diff --git a/modules.d/99base/memtrace-ko.sh b/modules.d/99base/memtrace-ko.sh deleted file mode 100755 index ee035e15..00000000 --- a/modules.d/99base/memtrace-ko.sh +++ /dev/null @@ -1,191 +0,0 @@ -#!/bin/sh - -# Try to find out kernel modules with large total memory allocation during loading. -# For large slab allocation, it will fall into buddy, also not trace "mm_page_free" -# considering large free is quite rare for module_init, thus saving tons of events -# to avoid trace data overwritten. -# -# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose. - -# "sys/kernel/tracing" has the priority if exists. -get_trace_base() { - # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available. - if [ -d "/sys/kernel/tracing" ]; then - echo "/sys/kernel" - else - echo "/sys/kernel/debug" - fi -} - -# We want to enable these trace events. -get_want_events() { - echo "module:module_put module:module_load kmem:mm_page_alloc" -} - -get_event_filter() { - echo "comm == systemd-udevd || comm == modprobe || comm == insmod" -} - -is_trace_ready() { - local trace_base want_events current_events - - trace_base=$(get_trace_base) - ! [ -f "$trace_base/tracing/trace" ] && return 1 - - [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1 - - # Also check if trace events were properly setup. - want_events=$(get_want_events) - current_events=$(echo $(cat $trace_base/tracing/set_event)) - [ "$current_events" != "$want_events" ] && return 1 - - return 0 -} - -prepare_trace() { - local trace_base - - trace_base=$(get_trace_base) - # old debugfs interface case. - if ! [ -d "$trace_base/tracing" ]; then - mount none -t debugfs $trace_base - # new tracefs interface case. - elif ! [ -f "$trace_base/tracing/trace" ]; then - mount none -t tracefs "$trace_base/tracing" - fi - - if ! [ -f "$trace_base/tracing/trace" ]; then - echo "WARN: Mount trace failed for kernel module memory analyzing." - return 1 - fi - - # Active all the wanted trace events. - echo "$(get_want_events)" > $trace_base/tracing/set_event - - # There are three kinds of known applications for module loading: - # "systemd-udevd", "modprobe" and "insmod". - # Set them as the global events filter. - # NOTE: Some kernel may not support this format of filter, anyway - # the operation will fail and it doesn't matter. - echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1 - echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1 - - # Set the number of comm-pid if supported. - if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then - # Thanks to filters, 4096 is big enough(also well supported). - echo 4096 > $trace_base/tracing/saved_cmdlines_size - fi - - # Enable and clear trace data for the first time. - echo 1 > $trace_base/tracing/tracing_on - echo > $trace_base/tracing/trace - echo "Prepare trace success." - return 0 -} - -order_to_pages() -{ - local pages=1 - local order=$1 - - while [ "$order" != 0 ]; do - order=$((order-1)) - pages=$(($pages*2)) - done - - echo $pages -} - -parse_trace_data() { - local module_name tmp_eval pages - - cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args - do - # Skip comment lines - if [ "$pid" = "#" ]; then - continue - fi - - pid=${pid##*-} - function=${function%:} - if [ "$function" = "module_load" ]; then - # One module is being loaded, save the task pid for tracking. - # Remove the trailing after whitespace, there may be the module flags. - module_name=${args%% *} - # Mark current_module to track the task. - eval current_module_$pid="$module_name" - tmp_eval=$(eval echo '${module_loaded_'${module_name}'}') - if [ -n "$tmp_eval" ]; then - echo "WARN: \"$module_name\" was loaded multiple times!" - fi - eval unset module_loaded_$module_name - eval nr_alloc_pages_$module_name=0 - continue - fi - - module_name=$(eval echo '${current_module_'${pid}'}') - if [ -z "$module_name" ]; then - continue - fi - - # Once we get here, the task is being tracked(is loading a module). - if [ "$function" = "module_put" ]; then - # Mark the module as loaded when the first module_put event happens after module_load. - tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}') - echo "$tmp_eval pages consumed by \"$module_name\"" - eval module_loaded_$module_name=1 - # Module loading finished, so untrack the task. - eval unset current_module_$pid - eval unset nr_alloc_pages_$module_name - continue - fi - - if [ "$function" = "mm_page_alloc" ]; then - # Get order first, then convert to actual pages. - pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/') - pages=$(order_to_pages "$pages") - tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}') - eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))" - fi - done -} - -cleanup_trace() { - local trace_base - - if is_trace_ready; then - trace_base=$(get_trace_base) - echo 0 > $trace_base/tracing/tracing_on - echo > $trace_base/tracing/trace - echo > $trace_base/tracing/set_event - echo 0 > $trace_base/tracing/events/kmem/filter - echo 0 > $trace_base/tracing/events/module/filter - fi -} - -show_usage() { - echo "Find out kernel modules with large memory consumption during loading based on trace." - echo "Usage:" - echo "1) run it first to setup trace." - echo "2) run again to parse the trace data if any." - echo "3) run with \"--cleanup\" option to cleanup trace after use." -} - -if [ "$1" = "--help" ]; then - show_usage - exit 0 -fi - -if [ "$1" = "--cleanup" ]; then - cleanup_trace - exit 0 -fi - -if is_trace_ready ; then - echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)" - parse_trace_data -else - prepare_trace -fi - -exit $? diff --git a/modules.d/99base/module-setup.sh b/modules.d/99base/module-setup.sh index 3a2246b4..c9e3ebcb 100755 --- a/modules.d/99base/module-setup.sh +++ b/modules.d/99base/module-setup.sh @@ -39,7 +39,6 @@ install() { inst_script "$moddir/initqueue.sh" "/sbin/initqueue" inst_script "$moddir/loginit.sh" "/sbin/loginit" inst_script "$moddir/rdsosreport.sh" "/sbin/rdsosreport" - inst_script "$moddir/memtrace-ko.sh" "/sbin/tracekomem" [ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib mkdir -m 0755 -p ${initdir}/lib/dracut