17094c
From adacd8101e7558930e4499c21006eaa14629c234 Mon Sep 17 00:00:00 2001
17094c
From: Kairui Song <kasong@redhat.com>
17094c
Date: Wed, 8 Apr 2020 20:09:38 +0800
17094c
Subject: [PATCH] Add 99memstrack module
17094c
17094c
memstrack is a new tool to track the overall memory usage and
17094c
allocation, which can help off load the improve the builtin module
17094c
memory tracing function in dracut.
17094c
17094c
With this change, the rd.memdebug=4 behavior is similiar with before,
17094c
but the report is defered to pre-pivot, so the memory usage info
17094c
during the whole initramfs run is traced. And the ourput format is
17094c
changed a bit:
17094c
17094c
  dracut-pre-pivot[519]: ======== Report format module_summary: ========
17094c
  dracut-pre-pivot[519]: Module squashfs using 10.4MB (2658 pages), peak allocation 10.4MB (2671 pages)
17094c
  dracut-pre-pivot[519]: Module qxl using 3.4MB (865 pages), peak allocation 3.4MB (880 pages)
17094c
  dracut-pre-pivot[519]: Module crc32c_intel using 2.0MB (519 pages), peak allocation 3.0MB (769 pages)
17094c
  dracut-pre-pivot[519]: Module serio_raw using 2.0MB (505 pages), peak allocation 3.6MB (918 pages)
17094c
  dracut-pre-pivot[519]: Module virtio_console using 1.6MB (416 pages), peak allocation 1.6MB (419 pages)
17094c
  ... snip ...
17094c
  dracut-pre-pivot[519]: ======== Report format module_summary END ========
17094c
17094c
It now contains more detail and also includes the peak usage which could
17094c
be more helpful.
17094c
17094c
And now it have a rd.memdebug=5, which will print more detail about
17094c
the stack trace of the top memory user, also printed on pri-pivot:
17094c
17094c
  dracut-pre-pivot[519]: ======== Report format module_top: ========
17094c
  dracut-pre-pivot[519]: Top stack usage of module squashfs:
17094c
  dracut-pre-pivot[519]:   (null) Pages: 2658 (peak: 2671)
17094c
  dracut-pre-pivot[519]:     (null) Pages: 2658 (peak: 2671)
17094c
  dracut-pre-pivot[519]:       async_page_fault (0xffffffff81a01149) Pages: 1448 (peak: 1461)
17094c
  dracut-pre-pivot[519]:         do_async_page_fault (0xffffffff8105c509) Pages: 1448 (peak: 1461)
17094c
  dracut-pre-pivot[519]:           do_page_fault (0xffffffff8106296a) Pages: 1448 (peak: 1461)
17094c
  dracut-pre-pivot[519]:             do_user_addr_fault (0xffffffff810626bd) Pages: 1448 (peak: 1461)
17094c
  dracut-pre-pivot[519]:               handle_mm_fault (0xffffffff812940c4) Pages: 1448 (peak: 1461)
17094c
  dracut-pre-pivot[519]:                 __handle_mm_fault (0xffffffff81293627) Pages: 1195 (peak: 1208)
17094c
  dracut-pre-pivot[519]:                   __do_fault (0xffffffff8128b07e) Pages: 1195 (peak: 1208)
17094c
  dracut-pre-pivot[519]:                     filemap_fault (0xffffffff8124c0b9) Pages: 1195 (peak: 1208)
17094c
  dracut-pre-pivot[519]:                       __do_page_cache_readahead (0xffffffff812585da) Pages: 1063 (peak: 1076)
17094c
  dracut-pre-pivot[519]:                         read_pages (0xffffffff812583c2) Pages: 1063 (peak: 1076)
17094c
  dracut-pre-pivot[519]:                           squashfs_readpage squashfs (0xffffffffc0022073) Pages: 1039 (peak: 1052)
17094c
  dracut-pre-pivot[519]:                             squashfs_readpage_block squashfs (0xffffffffc0024334) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                               squashfs_copy_cache squashfs (0xffffffffc0021a3f) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                                 pagecache_get_page (0xffffffff8124abf7) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                                   __page_cache_alloc (0xffffffff81247df6) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                                     alloc_pages_current (0xffffffff812cdca7) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                                       __alloc_pages_nodemask (0xffffffff812b3107) Pages: 744 (peak: 744)
17094c
  dracut-pre-pivot[519]:                                         __alloc_pages_nodemask (0xffffffff812b3107) Pages: 1488 (peak: 1488)
17094c
  dracut-pre-pivot[519]: Top stack usage of module qxl:
17094c
  dracut-pre-pivot[519]:   (null) Pages: 865 (peak: 880)
17094c
  dracut-pre-pivot[519]:     entry_SYSCALL_64_after_hwframe (0xffffffff81a0008c) Pages: 855 (peak: 858)
17094c
  dracut-pre-pivot[519]:       do_syscall_64 (0xffffffff81002a5a) Pages: 855 (peak: 858)
17094c
  dracut-pre-pivot[519]:         __x64_sys_finit_module (0xffffffff8117ccea) Pages: 811 (peak: 811)
17094c
  dracut-pre-pivot[519]:           __do_sys_finit_module (0xffffffff8117cc6e) Pages: 811 (peak: 811)
17094c
  dracut-pre-pivot[519]:             load_module (0xffffffff8117c6be) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:               do_init_module (0xffffffff81179e72) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:                 do_one_initcall (0xffffffff81000d5a) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:                   serio_raw_poll serio_raw (0xffffffffc0200054) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:                     __pci_register_driver (0xffffffff81557804) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:                       driver_register (0xffffffff8167ed24) Pages: 802 (peak: 802)
17094c
  dracut-pre-pivot[519]:                         bus_add_driver (0xffffffff8167cbb2) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                           driver_attach (0xffffffff8167d28e) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                             bus_for_each_dev (0xffffffff8167b62c) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                               __driver_attach (0xffffffff8167e18f) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                 device_driver_attach (0xffffffff8167e0ed) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                   driver_probe_device (0xffffffff8167de6c) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                     really_probe (0xffffffff8167d9c9) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                       pci_device_probe (0xffffffff81559627) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                         local_pci_probe (0xffffffff81557f98) Pages: 801 (peak: 801)
17094c
  dracut-pre-pivot[519]:                                           qxl_pci_probe qxl (0xffffffffc01f0387) Pages: 773 (peak: 773)
17094c
  dracut-pre-pivot[519]:                                             drm_fbdev_generic_setup drm_kms_helper (0xffffffffc01b30c5) Pages: 773 (peak: 773)
17094c
  dracut-pre-pivot[519]:                                               drm_fbdev_client_hotplug drm_kms_helper (0xffffffffc01b2656) Pages: 773 (peak: 773)
17094c
  dracut-pre-pivot[519]:                                                 __drm_fb_helper_initial_config_and_unlock drm_kms_helper (0xffffffffc01b1a28) Pages: 770 (peak: 770)
17094c
  dracut-pre-pivot[519]:                                                   drm_fb_helper_generic_probe drm_kms_helper (0xffffffffc01b2fa5) Pages: 770 (peak: 770)
17094c
  dracut-pre-pivot[519]:                                                     vzalloc (0xffffffff812aa39c) Pages: 770 (peak: 770)
17094c
  dracut-pre-pivot[519]:                                                       __vmalloc_node_range (0xffffffff812aa200) Pages: 768 (peak: 768)
17094c
  ... snip ...
17094c
  ======== Report format module_top END ========
17094c
17094c
This could be very helpful for debuging memory usage issues.
17094c
17094c
(cherry picked from commit 7dd8a2f4d351ce8975c0af122732a2a12697c8cc)
17094c
17094c
Resolves: #1829528
17094c
---
17094c
 dracut.cmdline.7.asc                      | 16 +++++---
17094c
 dracut.spec                               |  2 +
17094c
 modules.d/99base/dracut-lib.sh            |  2 +-
17094c
 modules.d/99memstrack/memstrack-report.sh | 23 +++++++++++
17094c
 modules.d/99memstrack/memstrack-start.sh  | 68 +++++++++++++++++++++++++++++++
17094c
 modules.d/99memstrack/memstrack.service   | 13 ++++++
17094c
 modules.d/99memstrack/module-setup.sh     | 27 ++++++++++++
17094c
 7 files changed, 144 insertions(+), 7 deletions(-)
17094c
17094c
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
17094c
index 0dc58d1a..7f31fbae 100644
17094c
--- a/dracut.cmdline.7.asc
17094c
+++ b/dracut.cmdline.7.asc
17094c
@@ -187,8 +187,8 @@ It should be attached to any report about dracut problems.
17094c
     _/run/initramfs/init.log_.
17094c
     If "quiet" is set, it also logs to the console.
17094c
 
17094c
-**rd.memdebug=[0-3]**::
17094c
-    Print memory usage info at various points, set the verbose level from 0 to 4.
17094c
+**rd.memdebug=[0-5]**::
17094c
+    Print memory usage info at various points, set the verbose level from 0 to 5.
17094c
 +
17094c
     Higher level means more debugging output:
17094c
 +
17094c
@@ -197,10 +197,14 @@ It should be attached to any report about dracut problems.
17094c
     1 - partial /proc/meminfo
17094c
     2 - /proc/meminfo
17094c
     3 - /proc/meminfo + /proc/slabinfo
17094c
-    4 - /proc/meminfo + /proc/slabinfo + tracekomem
17094c
-        NOTE: tracekomem is a shell script utilizing kernel trace to track
17094c
-              the rough total memory consumption of kernel modules during
17094c
-              loading. It may override other trace configurations.
17094c
+    4 - /proc/meminfo + /proc/slabinfo + memstrack summary
17094c
+        NOTE: memstrack is a memory tracing tool that tracks the total memory
17094c
+              consumption, and peak memory consumption of each kernel modules
17094c
+              and userspace progress during the whole initramfs runtime, report
17094c
+              is genereted and the end of initramsfs run.
17094c
+    5 - /proc/meminfo + /proc/slabinfo + memstrack (with top memory stacktrace)
17094c
+        NOTE: memstrack (with top memory stacktrace) will print top memory
17094c
+              allocation stack traces during the whole initramfs runtime.
17094c
 ----
17094c
 
17094c
 **rd.break**::
17094c
diff --git a/dracut.spec b/dracut.spec
17094c
index 0f7eb4bf..c82c24f6 100644
17094c
--- a/dracut.spec
17094c
+++ b/dracut.spec
17094c
@@ -84,6 +84,7 @@ Requires: xz
17094c
 Requires: gzip
17094c
 
17094c
 %if 0%{?fedora} || 0%{?rhel}
17094c
+Recommends: memstrack
17094c
 Recommends: hardlink
17094c
 Recommends: pigz
17094c
 Recommends: kpartx
17094c
@@ -399,6 +400,7 @@ install -m 0755 51-dracut-rescue-postinst.sh $RPM_BUILD_ROOT%{_sysconfdir}/kerne
17094c
 %{dracutlibdir}/modules.d/98syslog
17094c
 %{dracutlibdir}/modules.d/98usrmount
17094c
 %{dracutlibdir}/modules.d/99base
17094c
+%{dracutlibdir}/modules.d/99memstrack
17094c
 %{dracutlibdir}/modules.d/99fs-lib
17094c
 %{dracutlibdir}/modules.d/99shutdown
17094c
 %attr(0644,root,root) %ghost %config(missingok,noreplace) %{_localstatedir}/log/dracut.log
17094c
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
17094c
index 502c7568..56dd63ef 100755
17094c
--- a/modules.d/99base/dracut-lib.sh
17094c
+++ b/modules.d/99base/dracut-lib.sh
17094c
@@ -1220,7 +1220,7 @@ are_lists_eq() {
17094c
 
17094c
 setmemdebug() {
17094c
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
17094c
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
17094c
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 5 rd.memdebug)
17094c
     fi
17094c
 }
17094c
 
17094c
diff --git a/modules.d/99memstrack/memstrack-report.sh b/modules.d/99memstrack/memstrack-report.sh
17094c
new file mode 100755
17094c
index 00000000..241e8621
17094c
--- /dev/null
17094c
+++ b/modules.d/99memstrack/memstrack-report.sh
17094c
@@ -0,0 +1,23 @@
17094c
+#!/usr/bin/env bash
17094c
+. /lib/dracut-lib.sh
17094c
+
17094c
+if ! [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
17094c
+    exit 0
17094c
+fi
17094c
+
17094c
+if type -P systemctl >/dev/null; then
17094c
+    systemctl stop memstrack.service
17094c
+else
17094c
+    get_pid_of_tracer () {
17094c
+        local _user _pid _rest
17094c
+        read _user _pid _rest <<< $(ps aux | grep [m]emstrack | head -1)
17094c
+        echo $_pid
17094c
+    }
17094c
+
17094c
+    kill -s INT $(get_pid_of_tracer)
17094c
+    while [[ -n $(get_pid_of_tracer) ]]; do
17094c
+        sleep 1
17094c
+    done
17094c
+fi
17094c
+
17094c
+cat /.memstrack
17094c
diff --git a/modules.d/99memstrack/memstrack-start.sh b/modules.d/99memstrack/memstrack-start.sh
17094c
new file mode 100755
17094c
index 00000000..5aa73efe
17094c
--- /dev/null
17094c
+++ b/modules.d/99memstrack/memstrack-start.sh
17094c
@@ -0,0 +1,68 @@
17094c
+#!/bin/sh
17094c
+# Mount kernel debug fs so debug tools can work.
17094c
+# memdebug=4 and memdebug=5 requires debug fs to be mounted.
17094c
+# And there is no need to umount it.
17094c
+
17094c
+type getargnum >/dev/null 2>&1 || . /lib/dracut-lib.sh
17094c
+
17094c
+# "sys/kernel/tracing" has the priority if exists.
17094c
+get_trace_base() {
17094c
+    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
17094c
+    if [ -d "/sys/kernel/tracing" ]; then
17094c
+        echo "/sys/kernel"
17094c
+    else
17094c
+        echo "/sys/kernel/debug"
17094c
+    fi
17094c
+}
17094c
+
17094c
+is_debugfs_ready() {
17094c
+    [ -f "$(get_trace_base)/tracing/trace" ]
17094c
+}
17094c
+
17094c
+prepare_debugfs() {
17094c
+    local trace_base
17094c
+
17094c
+    trace_base=$(get_trace_base)
17094c
+    # old debugfs interface case.
17094c
+    if ! [ -d "$trace_base/tracing" ]; then
17094c
+        mount none -t debugfs $trace_base
17094c
+    # new tracefs interface case.
17094c
+    elif ! [ -f "$trace_base/tracing/trace" ]; then
17094c
+        mount none -t tracefs "$trace_base/tracing"
17094c
+    fi
17094c
+
17094c
+    if ! [ -f "$trace_base/tracing/trace" ]; then
17094c
+        echo "WARN: failed to mount debugfs"
17094c
+        return 1
17094c
+    fi
17094c
+}
17094c
+
17094c
+if ! is_debugfs_ready ; then
17094c
+    prepare_debugfs
17094c
+fi
17094c
+
17094c
+if [ -n "$DEBUG_MEM_LEVEL" ]; then
17094c
+    if [ "$DEBUG_MEM_LEVEL" -ge 5 ]; then
17094c
+        echo "memstrack - will report kernel module memory usage summary and top allocation stack"
17094c
+        memstrack --report module_summary,module_top --notui --throttle 80 -o /.memstrack &
17094c
+    elif [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
17094c
+        echo "memstrack - will report memory usage summary"
17094c
+        memstrack --report module_summary --notui --throttle 80 -o /.memstrack &
17094c
+    else
17094c
+        exit 0;
17094c
+    fi
17094c
+fi
17094c
+
17094c
+PID=$!
17094c
+RET=$?
17094c
+
17094c
+if [ $RET -ne 0 ]; then
17094c
+    echo "Failed to start memstrack, exit status: $RET"
17094c
+    exit $RET
17094c
+fi
17094c
+
17094c
+# Wait a second for memstrack to setup everything, avoid missing any event
17094c
+sleep 1
17094c
+
17094c
+echo $PID > /run/memstrack.pid
17094c
+disown
17094c
diff --git a/modules.d/99memstrack/memstrack.service b/modules.d/99memstrack/memstrack.service
17094c
new file mode 100644
17094c
index 00000000..6b47adef
17094c
--- /dev/null
17094c
+++ b/modules.d/99memstrack/memstrack.service
17094c
@@ -0,0 +1,13 @@
17094c
+[Unit]
17094c
+Description=Memstrack Anylazing Service
17094c
+DefaultDependencies=no
17094c
+Before=dracut-cmdline.service systemd-udevd.service local-fs-pre.target
17094c
+IgnoreOnIsolate=true
17094c
+
17094c
+[Service]
17094c
+Type=simple
17094c
+ExecStart=/bin/memstrack-start
17094c
+PIDFile=/run/memstrack.pid
17094c
+StandardInput=null
17094c
+StandardOutput=syslog+console
17094c
+StandardError=syslog+console
17094c
diff --git a/modules.d/99memstrack/module-setup.sh b/modules.d/99memstrack/module-setup.sh
17094c
new file mode 100755
17094c
index 00000000..d5bacb4d
17094c
--- /dev/null
17094c
+++ b/modules.d/99memstrack/module-setup.sh
17094c
@@ -0,0 +1,27 @@
17094c
+#!/usr/bin/bash
17094c
+
17094c
+check() {
17094c
+    if type -P memstrack >/dev/null; then
17094c
+        dinfo "memstrack is available"
17094c
+        return 0
17094c
+    fi
17094c
+
17094c
+    dinfo "memstrack is not available"
17094c
+    dinfo "If you need to use rd.memdebug>=4, please install memstrack"
17094c
+
17094c
+    return 1
17094c
+}
17094c
+
17094c
+depends() {
17094c
+    return 0
17094c
+}
17094c
+
17094c
+install() {
17094c
+    inst "/bin/memstrack" "/bin/memstrack"
17094c
+
17094c
+    inst "$moddir/memstrack-start.sh" "/bin/memstrack-start"
17094c
+    inst_hook cleanup 99 "$moddir/memstrack-report.sh"
17094c
+
17094c
+    inst "$moddir/memstrack.service" "$systemdsystemunitdir/memstrack.service"
17094c
+    systemctl -q --root "$initdir" add-wants initrd.target memstrack.service
17094c
+}
17094c