6df2a7
From adacd8101e7558930e4499c21006eaa14629c234 Mon Sep 17 00:00:00 2001
6df2a7
From: Kairui Song <kasong@redhat.com>
6df2a7
Date: Wed, 8 Apr 2020 20:09:38 +0800
6df2a7
Subject: [PATCH] Add 99memstrack module
6df2a7
6df2a7
memstrack is a new tool to track the overall memory usage and
6df2a7
allocation, which can help off load the improve the builtin module
6df2a7
memory tracing function in dracut.
6df2a7
6df2a7
With this change, the rd.memdebug=4 behavior is similiar with before,
6df2a7
but the report is defered to pre-pivot, so the memory usage info
6df2a7
during the whole initramfs run is traced. And the ourput format is
6df2a7
changed a bit:
6df2a7
6df2a7
  dracut-pre-pivot[519]: ======== Report format module_summary: ========
6df2a7
  dracut-pre-pivot[519]: Module squashfs using 10.4MB (2658 pages), peak allocation 10.4MB (2671 pages)
6df2a7
  dracut-pre-pivot[519]: Module qxl using 3.4MB (865 pages), peak allocation 3.4MB (880 pages)
6df2a7
  dracut-pre-pivot[519]: Module crc32c_intel using 2.0MB (519 pages), peak allocation 3.0MB (769 pages)
6df2a7
  dracut-pre-pivot[519]: Module serio_raw using 2.0MB (505 pages), peak allocation 3.6MB (918 pages)
6df2a7
  dracut-pre-pivot[519]: Module virtio_console using 1.6MB (416 pages), peak allocation 1.6MB (419 pages)
6df2a7
  ... snip ...
6df2a7
  dracut-pre-pivot[519]: ======== Report format module_summary END ========
6df2a7
6df2a7
It now contains more detail and also includes the peak usage which could
6df2a7
be more helpful.
6df2a7
6df2a7
And now it have a rd.memdebug=5, which will print more detail about
6df2a7
the stack trace of the top memory user, also printed on pri-pivot:
6df2a7
6df2a7
  dracut-pre-pivot[519]: ======== Report format module_top: ========
6df2a7
  dracut-pre-pivot[519]: Top stack usage of module squashfs:
6df2a7
  dracut-pre-pivot[519]:   (null) Pages: 2658 (peak: 2671)
6df2a7
  dracut-pre-pivot[519]:     (null) Pages: 2658 (peak: 2671)
6df2a7
  dracut-pre-pivot[519]:       async_page_fault (0xffffffff81a01149) Pages: 1448 (peak: 1461)
6df2a7
  dracut-pre-pivot[519]:         do_async_page_fault (0xffffffff8105c509) Pages: 1448 (peak: 1461)
6df2a7
  dracut-pre-pivot[519]:           do_page_fault (0xffffffff8106296a) Pages: 1448 (peak: 1461)
6df2a7
  dracut-pre-pivot[519]:             do_user_addr_fault (0xffffffff810626bd) Pages: 1448 (peak: 1461)
6df2a7
  dracut-pre-pivot[519]:               handle_mm_fault (0xffffffff812940c4) Pages: 1448 (peak: 1461)
6df2a7
  dracut-pre-pivot[519]:                 __handle_mm_fault (0xffffffff81293627) Pages: 1195 (peak: 1208)
6df2a7
  dracut-pre-pivot[519]:                   __do_fault (0xffffffff8128b07e) Pages: 1195 (peak: 1208)
6df2a7
  dracut-pre-pivot[519]:                     filemap_fault (0xffffffff8124c0b9) Pages: 1195 (peak: 1208)
6df2a7
  dracut-pre-pivot[519]:                       __do_page_cache_readahead (0xffffffff812585da) Pages: 1063 (peak: 1076)
6df2a7
  dracut-pre-pivot[519]:                         read_pages (0xffffffff812583c2) Pages: 1063 (peak: 1076)
6df2a7
  dracut-pre-pivot[519]:                           squashfs_readpage squashfs (0xffffffffc0022073) Pages: 1039 (peak: 1052)
6df2a7
  dracut-pre-pivot[519]:                             squashfs_readpage_block squashfs (0xffffffffc0024334) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                               squashfs_copy_cache squashfs (0xffffffffc0021a3f) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                                 pagecache_get_page (0xffffffff8124abf7) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                                   __page_cache_alloc (0xffffffff81247df6) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                                     alloc_pages_current (0xffffffff812cdca7) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                                       __alloc_pages_nodemask (0xffffffff812b3107) Pages: 744 (peak: 744)
6df2a7
  dracut-pre-pivot[519]:                                         __alloc_pages_nodemask (0xffffffff812b3107) Pages: 1488 (peak: 1488)
6df2a7
  dracut-pre-pivot[519]: Top stack usage of module qxl:
6df2a7
  dracut-pre-pivot[519]:   (null) Pages: 865 (peak: 880)
6df2a7
  dracut-pre-pivot[519]:     entry_SYSCALL_64_after_hwframe (0xffffffff81a0008c) Pages: 855 (peak: 858)
6df2a7
  dracut-pre-pivot[519]:       do_syscall_64 (0xffffffff81002a5a) Pages: 855 (peak: 858)
6df2a7
  dracut-pre-pivot[519]:         __x64_sys_finit_module (0xffffffff8117ccea) Pages: 811 (peak: 811)
6df2a7
  dracut-pre-pivot[519]:           __do_sys_finit_module (0xffffffff8117cc6e) Pages: 811 (peak: 811)
6df2a7
  dracut-pre-pivot[519]:             load_module (0xffffffff8117c6be) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:               do_init_module (0xffffffff81179e72) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:                 do_one_initcall (0xffffffff81000d5a) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:                   serio_raw_poll serio_raw (0xffffffffc0200054) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:                     __pci_register_driver (0xffffffff81557804) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:                       driver_register (0xffffffff8167ed24) Pages: 802 (peak: 802)
6df2a7
  dracut-pre-pivot[519]:                         bus_add_driver (0xffffffff8167cbb2) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                           driver_attach (0xffffffff8167d28e) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                             bus_for_each_dev (0xffffffff8167b62c) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                               __driver_attach (0xffffffff8167e18f) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                 device_driver_attach (0xffffffff8167e0ed) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                   driver_probe_device (0xffffffff8167de6c) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                     really_probe (0xffffffff8167d9c9) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                       pci_device_probe (0xffffffff81559627) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                         local_pci_probe (0xffffffff81557f98) Pages: 801 (peak: 801)
6df2a7
  dracut-pre-pivot[519]:                                           qxl_pci_probe qxl (0xffffffffc01f0387) Pages: 773 (peak: 773)
6df2a7
  dracut-pre-pivot[519]:                                             drm_fbdev_generic_setup drm_kms_helper (0xffffffffc01b30c5) Pages: 773 (peak: 773)
6df2a7
  dracut-pre-pivot[519]:                                               drm_fbdev_client_hotplug drm_kms_helper (0xffffffffc01b2656) Pages: 773 (peak: 773)
6df2a7
  dracut-pre-pivot[519]:                                                 __drm_fb_helper_initial_config_and_unlock drm_kms_helper (0xffffffffc01b1a28) Pages: 770 (peak: 770)
6df2a7
  dracut-pre-pivot[519]:                                                   drm_fb_helper_generic_probe drm_kms_helper (0xffffffffc01b2fa5) Pages: 770 (peak: 770)
6df2a7
  dracut-pre-pivot[519]:                                                     vzalloc (0xffffffff812aa39c) Pages: 770 (peak: 770)
6df2a7
  dracut-pre-pivot[519]:                                                       __vmalloc_node_range (0xffffffff812aa200) Pages: 768 (peak: 768)
6df2a7
  ... snip ...
6df2a7
  ======== Report format module_top END ========
6df2a7
6df2a7
This could be very helpful for debuging memory usage issues.
6df2a7
6df2a7
(cherry picked from commit 7dd8a2f4d351ce8975c0af122732a2a12697c8cc)
6df2a7
6df2a7
Resolves: #1829528
6df2a7
---
6df2a7
 dracut.cmdline.7.asc                      | 16 +++++---
6df2a7
 dracut.spec                               |  2 +
6df2a7
 modules.d/99base/dracut-lib.sh            |  2 +-
6df2a7
 modules.d/99memstrack/memstrack-report.sh | 23 +++++++++++
6df2a7
 modules.d/99memstrack/memstrack-start.sh  | 68 +++++++++++++++++++++++++++++++
6df2a7
 modules.d/99memstrack/memstrack.service   | 13 ++++++
6df2a7
 modules.d/99memstrack/module-setup.sh     | 27 ++++++++++++
6df2a7
 7 files changed, 144 insertions(+), 7 deletions(-)
6df2a7
6df2a7
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
6df2a7
index 0dc58d1a..7f31fbae 100644
6df2a7
--- a/dracut.cmdline.7.asc
6df2a7
+++ b/dracut.cmdline.7.asc
6df2a7
@@ -187,8 +187,8 @@ It should be attached to any report about dracut problems.
6df2a7
     _/run/initramfs/init.log_.
6df2a7
     If "quiet" is set, it also logs to the console.
6df2a7
 
6df2a7
-**rd.memdebug=[0-3]**::
6df2a7
-    Print memory usage info at various points, set the verbose level from 0 to 4.
6df2a7
+**rd.memdebug=[0-5]**::
6df2a7
+    Print memory usage info at various points, set the verbose level from 0 to 5.
6df2a7
 +
6df2a7
     Higher level means more debugging output:
6df2a7
 +
6df2a7
@@ -197,10 +197,14 @@ It should be attached to any report about dracut problems.
6df2a7
     1 - partial /proc/meminfo
6df2a7
     2 - /proc/meminfo
6df2a7
     3 - /proc/meminfo + /proc/slabinfo
6df2a7
-    4 - /proc/meminfo + /proc/slabinfo + tracekomem
6df2a7
-        NOTE: tracekomem is a shell script utilizing kernel trace to track
6df2a7
-              the rough total memory consumption of kernel modules during
6df2a7
-              loading. It may override other trace configurations.
6df2a7
+    4 - /proc/meminfo + /proc/slabinfo + memstrack summary
6df2a7
+        NOTE: memstrack is a memory tracing tool that tracks the total memory
6df2a7
+              consumption, and peak memory consumption of each kernel modules
6df2a7
+              and userspace progress during the whole initramfs runtime, report
6df2a7
+              is genereted and the end of initramsfs run.
6df2a7
+    5 - /proc/meminfo + /proc/slabinfo + memstrack (with top memory stacktrace)
6df2a7
+        NOTE: memstrack (with top memory stacktrace) will print top memory
6df2a7
+              allocation stack traces during the whole initramfs runtime.
6df2a7
 ----
6df2a7
 
6df2a7
 **rd.break**::
6df2a7
diff --git a/dracut.spec b/dracut.spec
6df2a7
index 0f7eb4bf..c82c24f6 100644
6df2a7
--- a/dracut.spec
6df2a7
+++ b/dracut.spec
6df2a7
@@ -84,6 +84,7 @@ Requires: xz
6df2a7
 Requires: gzip
6df2a7
 
6df2a7
 %if 0%{?fedora} || 0%{?rhel}
6df2a7
+Recommends: memstrack
6df2a7
 Recommends: hardlink
6df2a7
 Recommends: pigz
6df2a7
 Recommends: kpartx
6df2a7
@@ -399,6 +400,7 @@ install -m 0755 51-dracut-rescue-postinst.sh $RPM_BUILD_ROOT%{_sysconfdir}/kerne
6df2a7
 %{dracutlibdir}/modules.d/98syslog
6df2a7
 %{dracutlibdir}/modules.d/98usrmount
6df2a7
 %{dracutlibdir}/modules.d/99base
6df2a7
+%{dracutlibdir}/modules.d/99memstrack
6df2a7
 %{dracutlibdir}/modules.d/99fs-lib
6df2a7
 %{dracutlibdir}/modules.d/99shutdown
6df2a7
 %attr(0644,root,root) %ghost %config(missingok,noreplace) %{_localstatedir}/log/dracut.log
6df2a7
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
6df2a7
index 502c7568..56dd63ef 100755
6df2a7
--- a/modules.d/99base/dracut-lib.sh
6df2a7
+++ b/modules.d/99base/dracut-lib.sh
6df2a7
@@ -1220,7 +1220,7 @@ are_lists_eq() {
6df2a7
 
6df2a7
 setmemdebug() {
6df2a7
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
6df2a7
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
6df2a7
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 5 rd.memdebug)
6df2a7
     fi
6df2a7
 }
6df2a7
 
6df2a7
diff --git a/modules.d/99memstrack/memstrack-report.sh b/modules.d/99memstrack/memstrack-report.sh
6df2a7
new file mode 100755
6df2a7
index 00000000..241e8621
6df2a7
--- /dev/null
6df2a7
+++ b/modules.d/99memstrack/memstrack-report.sh
6df2a7
@@ -0,0 +1,23 @@
6df2a7
+#!/usr/bin/env bash
6df2a7
+. /lib/dracut-lib.sh
6df2a7
+
6df2a7
+if ! [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
6df2a7
+    exit 0
6df2a7
+fi
6df2a7
+
6df2a7
+if type -P systemctl >/dev/null; then
6df2a7
+    systemctl stop memstrack.service
6df2a7
+else
6df2a7
+    get_pid_of_tracer () {
6df2a7
+        local _user _pid _rest
6df2a7
+        read _user _pid _rest <<< $(ps aux | grep [m]emstrack | head -1)
6df2a7
+        echo $_pid
6df2a7
+    }
6df2a7
+
6df2a7
+    kill -s INT $(get_pid_of_tracer)
6df2a7
+    while [[ -n $(get_pid_of_tracer) ]]; do
6df2a7
+        sleep 1
6df2a7
+    done
6df2a7
+fi
6df2a7
+
6df2a7
+cat /.memstrack
6df2a7
diff --git a/modules.d/99memstrack/memstrack-start.sh b/modules.d/99memstrack/memstrack-start.sh
6df2a7
new file mode 100755
6df2a7
index 00000000..5aa73efe
6df2a7
--- /dev/null
6df2a7
+++ b/modules.d/99memstrack/memstrack-start.sh
6df2a7
@@ -0,0 +1,68 @@
6df2a7
+#!/bin/sh
6df2a7
+# Mount kernel debug fs so debug tools can work.
6df2a7
+# memdebug=4 and memdebug=5 requires debug fs to be mounted.
6df2a7
+# And there is no need to umount it.
6df2a7
+
6df2a7
+type getargnum >/dev/null 2>&1 || . /lib/dracut-lib.sh
6df2a7
+
6df2a7
+# "sys/kernel/tracing" has the priority if exists.
6df2a7
+get_trace_base() {
6df2a7
+    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
6df2a7
+    if [ -d "/sys/kernel/tracing" ]; then
6df2a7
+        echo "/sys/kernel"
6df2a7
+    else
6df2a7
+        echo "/sys/kernel/debug"
6df2a7
+    fi
6df2a7
+}
6df2a7
+
6df2a7
+is_debugfs_ready() {
6df2a7
+    [ -f "$(get_trace_base)/tracing/trace" ]
6df2a7
+}
6df2a7
+
6df2a7
+prepare_debugfs() {
6df2a7
+    local trace_base
6df2a7
+
6df2a7
+    trace_base=$(get_trace_base)
6df2a7
+    # old debugfs interface case.
6df2a7
+    if ! [ -d "$trace_base/tracing" ]; then
6df2a7
+        mount none -t debugfs $trace_base
6df2a7
+    # new tracefs interface case.
6df2a7
+    elif ! [ -f "$trace_base/tracing/trace" ]; then
6df2a7
+        mount none -t tracefs "$trace_base/tracing"
6df2a7
+    fi
6df2a7
+
6df2a7
+    if ! [ -f "$trace_base/tracing/trace" ]; then
6df2a7
+        echo "WARN: failed to mount debugfs"
6df2a7
+        return 1
6df2a7
+    fi
6df2a7
+}
6df2a7
+
6df2a7
+if ! is_debugfs_ready ; then
6df2a7
+    prepare_debugfs
6df2a7
+fi
6df2a7
+
6df2a7
+if [ -n "$DEBUG_MEM_LEVEL" ]; then
6df2a7
+    if [ "$DEBUG_MEM_LEVEL" -ge 5 ]; then
6df2a7
+        echo "memstrack - will report kernel module memory usage summary and top allocation stack"
6df2a7
+        memstrack --report module_summary,module_top --notui --throttle 80 -o /.memstrack &
6df2a7
+    elif [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
6df2a7
+        echo "memstrack - will report memory usage summary"
6df2a7
+        memstrack --report module_summary --notui --throttle 80 -o /.memstrack &
6df2a7
+    else
6df2a7
+        exit 0;
6df2a7
+    fi
6df2a7
+fi
6df2a7
+
6df2a7
+PID=$!
6df2a7
+RET=$?
6df2a7
+
6df2a7
+if [ $RET -ne 0 ]; then
6df2a7
+    echo "Failed to start memstrack, exit status: $RET"
6df2a7
+    exit $RET
6df2a7
+fi
6df2a7
+
6df2a7
+# Wait a second for memstrack to setup everything, avoid missing any event
6df2a7
+sleep 1
6df2a7
+
6df2a7
+echo $PID > /run/memstrack.pid
6df2a7
+disown
6df2a7
diff --git a/modules.d/99memstrack/memstrack.service b/modules.d/99memstrack/memstrack.service
6df2a7
new file mode 100644
6df2a7
index 00000000..6b47adef
6df2a7
--- /dev/null
6df2a7
+++ b/modules.d/99memstrack/memstrack.service
6df2a7
@@ -0,0 +1,13 @@
6df2a7
+[Unit]
6df2a7
+Description=Memstrack Anylazing Service
6df2a7
+DefaultDependencies=no
6df2a7
+Before=dracut-cmdline.service systemd-udevd.service local-fs-pre.target
6df2a7
+IgnoreOnIsolate=true
6df2a7
+
6df2a7
+[Service]
6df2a7
+Type=simple
6df2a7
+ExecStart=/bin/memstrack-start
6df2a7
+PIDFile=/run/memstrack.pid
6df2a7
+StandardInput=null
6df2a7
+StandardOutput=syslog+console
6df2a7
+StandardError=syslog+console
6df2a7
diff --git a/modules.d/99memstrack/module-setup.sh b/modules.d/99memstrack/module-setup.sh
6df2a7
new file mode 100755
6df2a7
index 00000000..d5bacb4d
6df2a7
--- /dev/null
6df2a7
+++ b/modules.d/99memstrack/module-setup.sh
6df2a7
@@ -0,0 +1,27 @@
6df2a7
+#!/usr/bin/bash
6df2a7
+
6df2a7
+check() {
6df2a7
+    if type -P memstrack >/dev/null; then
6df2a7
+        dinfo "memstrack is available"
6df2a7
+        return 0
6df2a7
+    fi
6df2a7
+
6df2a7
+    dinfo "memstrack is not available"
6df2a7
+    dinfo "If you need to use rd.memdebug>=4, please install memstrack"
6df2a7
+
6df2a7
+    return 1
6df2a7
+}
6df2a7
+
6df2a7
+depends() {
6df2a7
+    return 0
6df2a7
+}
6df2a7
+
6df2a7
+install() {
6df2a7
+    inst "/bin/memstrack" "/bin/memstrack"
6df2a7
+
6df2a7
+    inst "$moddir/memstrack-start.sh" "/bin/memstrack-start"
6df2a7
+    inst_hook cleanup 99 "$moddir/memstrack-report.sh"
6df2a7
+
6df2a7
+    inst "$moddir/memstrack.service" "$systemdsystemunitdir/memstrack.service"
6df2a7
+    systemctl -q --root "$initdir" add-wants initrd.target memstrack.service
6df2a7
+}
6df2a7