6edea0
From adacd8101e7558930e4499c21006eaa14629c234 Mon Sep 17 00:00:00 2001
6edea0
From: Kairui Song <kasong@redhat.com>
6edea0
Date: Wed, 8 Apr 2020 20:09:38 +0800
6edea0
Subject: [PATCH] Add 99memstrack module
6edea0
6edea0
memstrack is a new tool to track the overall memory usage and
6edea0
allocation, which can help off load the improve the builtin module
6edea0
memory tracing function in dracut.
6edea0
6edea0
With this change, the rd.memdebug=4 behavior is similiar with before,
6edea0
but the report is defered to pre-pivot, so the memory usage info
6edea0
during the whole initramfs run is traced. And the ourput format is
6edea0
changed a bit:
6edea0
6edea0
  dracut-pre-pivot[519]: ======== Report format module_summary: ========
6edea0
  dracut-pre-pivot[519]: Module squashfs using 10.4MB (2658 pages), peak allocation 10.4MB (2671 pages)
6edea0
  dracut-pre-pivot[519]: Module qxl using 3.4MB (865 pages), peak allocation 3.4MB (880 pages)
6edea0
  dracut-pre-pivot[519]: Module crc32c_intel using 2.0MB (519 pages), peak allocation 3.0MB (769 pages)
6edea0
  dracut-pre-pivot[519]: Module serio_raw using 2.0MB (505 pages), peak allocation 3.6MB (918 pages)
6edea0
  dracut-pre-pivot[519]: Module virtio_console using 1.6MB (416 pages), peak allocation 1.6MB (419 pages)
6edea0
  ... snip ...
6edea0
  dracut-pre-pivot[519]: ======== Report format module_summary END ========
6edea0
6edea0
It now contains more detail and also includes the peak usage which could
6edea0
be more helpful.
6edea0
6edea0
And now it have a rd.memdebug=5, which will print more detail about
6edea0
the stack trace of the top memory user, also printed on pri-pivot:
6edea0
6edea0
  dracut-pre-pivot[519]: ======== Report format module_top: ========
6edea0
  dracut-pre-pivot[519]: Top stack usage of module squashfs:
6edea0
  dracut-pre-pivot[519]:   (null) Pages: 2658 (peak: 2671)
6edea0
  dracut-pre-pivot[519]:     (null) Pages: 2658 (peak: 2671)
6edea0
  dracut-pre-pivot[519]:       async_page_fault (0xffffffff81a01149) Pages: 1448 (peak: 1461)
6edea0
  dracut-pre-pivot[519]:         do_async_page_fault (0xffffffff8105c509) Pages: 1448 (peak: 1461)
6edea0
  dracut-pre-pivot[519]:           do_page_fault (0xffffffff8106296a) Pages: 1448 (peak: 1461)
6edea0
  dracut-pre-pivot[519]:             do_user_addr_fault (0xffffffff810626bd) Pages: 1448 (peak: 1461)
6edea0
  dracut-pre-pivot[519]:               handle_mm_fault (0xffffffff812940c4) Pages: 1448 (peak: 1461)
6edea0
  dracut-pre-pivot[519]:                 __handle_mm_fault (0xffffffff81293627) Pages: 1195 (peak: 1208)
6edea0
  dracut-pre-pivot[519]:                   __do_fault (0xffffffff8128b07e) Pages: 1195 (peak: 1208)
6edea0
  dracut-pre-pivot[519]:                     filemap_fault (0xffffffff8124c0b9) Pages: 1195 (peak: 1208)
6edea0
  dracut-pre-pivot[519]:                       __do_page_cache_readahead (0xffffffff812585da) Pages: 1063 (peak: 1076)
6edea0
  dracut-pre-pivot[519]:                         read_pages (0xffffffff812583c2) Pages: 1063 (peak: 1076)
6edea0
  dracut-pre-pivot[519]:                           squashfs_readpage squashfs (0xffffffffc0022073) Pages: 1039 (peak: 1052)
6edea0
  dracut-pre-pivot[519]:                             squashfs_readpage_block squashfs (0xffffffffc0024334) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                               squashfs_copy_cache squashfs (0xffffffffc0021a3f) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                                 pagecache_get_page (0xffffffff8124abf7) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                                   __page_cache_alloc (0xffffffff81247df6) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                                     alloc_pages_current (0xffffffff812cdca7) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                                       __alloc_pages_nodemask (0xffffffff812b3107) Pages: 744 (peak: 744)
6edea0
  dracut-pre-pivot[519]:                                         __alloc_pages_nodemask (0xffffffff812b3107) Pages: 1488 (peak: 1488)
6edea0
  dracut-pre-pivot[519]: Top stack usage of module qxl:
6edea0
  dracut-pre-pivot[519]:   (null) Pages: 865 (peak: 880)
6edea0
  dracut-pre-pivot[519]:     entry_SYSCALL_64_after_hwframe (0xffffffff81a0008c) Pages: 855 (peak: 858)
6edea0
  dracut-pre-pivot[519]:       do_syscall_64 (0xffffffff81002a5a) Pages: 855 (peak: 858)
6edea0
  dracut-pre-pivot[519]:         __x64_sys_finit_module (0xffffffff8117ccea) Pages: 811 (peak: 811)
6edea0
  dracut-pre-pivot[519]:           __do_sys_finit_module (0xffffffff8117cc6e) Pages: 811 (peak: 811)
6edea0
  dracut-pre-pivot[519]:             load_module (0xffffffff8117c6be) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:               do_init_module (0xffffffff81179e72) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:                 do_one_initcall (0xffffffff81000d5a) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:                   serio_raw_poll serio_raw (0xffffffffc0200054) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:                     __pci_register_driver (0xffffffff81557804) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:                       driver_register (0xffffffff8167ed24) Pages: 802 (peak: 802)
6edea0
  dracut-pre-pivot[519]:                         bus_add_driver (0xffffffff8167cbb2) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                           driver_attach (0xffffffff8167d28e) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                             bus_for_each_dev (0xffffffff8167b62c) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                               __driver_attach (0xffffffff8167e18f) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                 device_driver_attach (0xffffffff8167e0ed) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                   driver_probe_device (0xffffffff8167de6c) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                     really_probe (0xffffffff8167d9c9) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                       pci_device_probe (0xffffffff81559627) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                         local_pci_probe (0xffffffff81557f98) Pages: 801 (peak: 801)
6edea0
  dracut-pre-pivot[519]:                                           qxl_pci_probe qxl (0xffffffffc01f0387) Pages: 773 (peak: 773)
6edea0
  dracut-pre-pivot[519]:                                             drm_fbdev_generic_setup drm_kms_helper (0xffffffffc01b30c5) Pages: 773 (peak: 773)
6edea0
  dracut-pre-pivot[519]:                                               drm_fbdev_client_hotplug drm_kms_helper (0xffffffffc01b2656) Pages: 773 (peak: 773)
6edea0
  dracut-pre-pivot[519]:                                                 __drm_fb_helper_initial_config_and_unlock drm_kms_helper (0xffffffffc01b1a28) Pages: 770 (peak: 770)
6edea0
  dracut-pre-pivot[519]:                                                   drm_fb_helper_generic_probe drm_kms_helper (0xffffffffc01b2fa5) Pages: 770 (peak: 770)
6edea0
  dracut-pre-pivot[519]:                                                     vzalloc (0xffffffff812aa39c) Pages: 770 (peak: 770)
6edea0
  dracut-pre-pivot[519]:                                                       __vmalloc_node_range (0xffffffff812aa200) Pages: 768 (peak: 768)
6edea0
  ... snip ...
6edea0
  ======== Report format module_top END ========
6edea0
6edea0
This could be very helpful for debuging memory usage issues.
6edea0
6edea0
(cherry picked from commit 7dd8a2f4d351ce8975c0af122732a2a12697c8cc)
6edea0
6edea0
Resolves: #1829528
6edea0
---
6edea0
 dracut.cmdline.7.asc                      | 16 +++++---
6edea0
 dracut.spec                               |  2 +
6edea0
 modules.d/99base/dracut-lib.sh            |  2 +-
6edea0
 modules.d/99memstrack/memstrack-report.sh | 23 +++++++++++
6edea0
 modules.d/99memstrack/memstrack-start.sh  | 68 +++++++++++++++++++++++++++++++
6edea0
 modules.d/99memstrack/memstrack.service   | 13 ++++++
6edea0
 modules.d/99memstrack/module-setup.sh     | 27 ++++++++++++
6edea0
 7 files changed, 144 insertions(+), 7 deletions(-)
6edea0
6edea0
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
6edea0
index 0dc58d1a..7f31fbae 100644
6edea0
--- a/dracut.cmdline.7.asc
6edea0
+++ b/dracut.cmdline.7.asc
6edea0
@@ -187,8 +187,8 @@ It should be attached to any report about dracut problems.
6edea0
     _/run/initramfs/init.log_.
6edea0
     If "quiet" is set, it also logs to the console.
6edea0
 
6edea0
-**rd.memdebug=[0-3]**::
6edea0
-    Print memory usage info at various points, set the verbose level from 0 to 4.
6edea0
+**rd.memdebug=[0-5]**::
6edea0
+    Print memory usage info at various points, set the verbose level from 0 to 5.
6edea0
 +
6edea0
     Higher level means more debugging output:
6edea0
 +
6edea0
@@ -197,10 +197,14 @@ It should be attached to any report about dracut problems.
6edea0
     1 - partial /proc/meminfo
6edea0
     2 - /proc/meminfo
6edea0
     3 - /proc/meminfo + /proc/slabinfo
6edea0
-    4 - /proc/meminfo + /proc/slabinfo + tracekomem
6edea0
-        NOTE: tracekomem is a shell script utilizing kernel trace to track
6edea0
-              the rough total memory consumption of kernel modules during
6edea0
-              loading. It may override other trace configurations.
6edea0
+    4 - /proc/meminfo + /proc/slabinfo + memstrack summary
6edea0
+        NOTE: memstrack is a memory tracing tool that tracks the total memory
6edea0
+              consumption, and peak memory consumption of each kernel modules
6edea0
+              and userspace progress during the whole initramfs runtime, report
6edea0
+              is genereted and the end of initramsfs run.
6edea0
+    5 - /proc/meminfo + /proc/slabinfo + memstrack (with top memory stacktrace)
6edea0
+        NOTE: memstrack (with top memory stacktrace) will print top memory
6edea0
+              allocation stack traces during the whole initramfs runtime.
6edea0
 ----
6edea0
 
6edea0
 **rd.break**::
6edea0
diff --git a/dracut.spec b/dracut.spec
6edea0
index 0f7eb4bf..c82c24f6 100644
6edea0
--- a/dracut.spec
6edea0
+++ b/dracut.spec
6edea0
@@ -84,6 +84,7 @@ Requires: xz
6edea0
 Requires: gzip
6edea0
 
6edea0
 %if 0%{?fedora} || 0%{?rhel}
6edea0
+Recommends: memstrack
6edea0
 Recommends: hardlink
6edea0
 Recommends: pigz
6edea0
 Recommends: kpartx
6edea0
@@ -399,6 +400,7 @@ install -m 0755 51-dracut-rescue-postinst.sh $RPM_BUILD_ROOT%{_sysconfdir}/kerne
6edea0
 %{dracutlibdir}/modules.d/98syslog
6edea0
 %{dracutlibdir}/modules.d/98usrmount
6edea0
 %{dracutlibdir}/modules.d/99base
6edea0
+%{dracutlibdir}/modules.d/99memstrack
6edea0
 %{dracutlibdir}/modules.d/99fs-lib
6edea0
 %{dracutlibdir}/modules.d/99shutdown
6edea0
 %attr(0644,root,root) %ghost %config(missingok,noreplace) %{_localstatedir}/log/dracut.log
6edea0
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
6edea0
index 502c7568..56dd63ef 100755
6edea0
--- a/modules.d/99base/dracut-lib.sh
6edea0
+++ b/modules.d/99base/dracut-lib.sh
6edea0
@@ -1220,7 +1220,7 @@ are_lists_eq() {
6edea0
 
6edea0
 setmemdebug() {
6edea0
     if [ -z "$DEBUG_MEM_LEVEL" ]; then
6edea0
-        export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
6edea0
+        export DEBUG_MEM_LEVEL=$(getargnum 0 0 5 rd.memdebug)
6edea0
     fi
6edea0
 }
6edea0
 
6edea0
diff --git a/modules.d/99memstrack/memstrack-report.sh b/modules.d/99memstrack/memstrack-report.sh
6edea0
new file mode 100755
6edea0
index 00000000..241e8621
6edea0
--- /dev/null
6edea0
+++ b/modules.d/99memstrack/memstrack-report.sh
6edea0
@@ -0,0 +1,23 @@
6edea0
+#!/usr/bin/env bash
6edea0
+. /lib/dracut-lib.sh
6edea0
+
6edea0
+if ! [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
6edea0
+    exit 0
6edea0
+fi
6edea0
+
6edea0
+if type -P systemctl >/dev/null; then
6edea0
+    systemctl stop memstrack.service
6edea0
+else
6edea0
+    get_pid_of_tracer () {
6edea0
+        local _user _pid _rest
6edea0
+        read _user _pid _rest <<< $(ps aux | grep [m]emstrack | head -1)
6edea0
+        echo $_pid
6edea0
+    }
6edea0
+
6edea0
+    kill -s INT $(get_pid_of_tracer)
6edea0
+    while [[ -n $(get_pid_of_tracer) ]]; do
6edea0
+        sleep 1
6edea0
+    done
6edea0
+fi
6edea0
+
6edea0
+cat /.memstrack
6edea0
diff --git a/modules.d/99memstrack/memstrack-start.sh b/modules.d/99memstrack/memstrack-start.sh
6edea0
new file mode 100755
6edea0
index 00000000..5aa73efe
6edea0
--- /dev/null
6edea0
+++ b/modules.d/99memstrack/memstrack-start.sh
6edea0
@@ -0,0 +1,68 @@
6edea0
+#!/bin/sh
6edea0
+# Mount kernel debug fs so debug tools can work.
6edea0
+# memdebug=4 and memdebug=5 requires debug fs to be mounted.
6edea0
+# And there is no need to umount it.
6edea0
+
6edea0
+type getargnum >/dev/null 2>&1 || . /lib/dracut-lib.sh
6edea0
+
6edea0
+# "sys/kernel/tracing" has the priority if exists.
6edea0
+get_trace_base() {
6edea0
+    # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
6edea0
+    if [ -d "/sys/kernel/tracing" ]; then
6edea0
+        echo "/sys/kernel"
6edea0
+    else
6edea0
+        echo "/sys/kernel/debug"
6edea0
+    fi
6edea0
+}
6edea0
+
6edea0
+is_debugfs_ready() {
6edea0
+    [ -f "$(get_trace_base)/tracing/trace" ]
6edea0
+}
6edea0
+
6edea0
+prepare_debugfs() {
6edea0
+    local trace_base
6edea0
+
6edea0
+    trace_base=$(get_trace_base)
6edea0
+    # old debugfs interface case.
6edea0
+    if ! [ -d "$trace_base/tracing" ]; then
6edea0
+        mount none -t debugfs $trace_base
6edea0
+    # new tracefs interface case.
6edea0
+    elif ! [ -f "$trace_base/tracing/trace" ]; then
6edea0
+        mount none -t tracefs "$trace_base/tracing"
6edea0
+    fi
6edea0
+
6edea0
+    if ! [ -f "$trace_base/tracing/trace" ]; then
6edea0
+        echo "WARN: failed to mount debugfs"
6edea0
+        return 1
6edea0
+    fi
6edea0
+}
6edea0
+
6edea0
+if ! is_debugfs_ready ; then
6edea0
+    prepare_debugfs
6edea0
+fi
6edea0
+
6edea0
+if [ -n "$DEBUG_MEM_LEVEL" ]; then
6edea0
+    if [ "$DEBUG_MEM_LEVEL" -ge 5 ]; then
6edea0
+        echo "memstrack - will report kernel module memory usage summary and top allocation stack"
6edea0
+        memstrack --report module_summary,module_top --notui --throttle 80 -o /.memstrack &
6edea0
+    elif [ "$DEBUG_MEM_LEVEL" -ge 4 ]; then
6edea0
+        echo "memstrack - will report memory usage summary"
6edea0
+        memstrack --report module_summary --notui --throttle 80 -o /.memstrack &
6edea0
+    else
6edea0
+        exit 0;
6edea0
+    fi
6edea0
+fi
6edea0
+
6edea0
+PID=$!
6edea0
+RET=$?
6edea0
+
6edea0
+if [ $RET -ne 0 ]; then
6edea0
+    echo "Failed to start memstrack, exit status: $RET"
6edea0
+    exit $RET
6edea0
+fi
6edea0
+
6edea0
+# Wait a second for memstrack to setup everything, avoid missing any event
6edea0
+sleep 1
6edea0
+
6edea0
+echo $PID > /run/memstrack.pid
6edea0
+disown
6edea0
diff --git a/modules.d/99memstrack/memstrack.service b/modules.d/99memstrack/memstrack.service
6edea0
new file mode 100644
6edea0
index 00000000..6b47adef
6edea0
--- /dev/null
6edea0
+++ b/modules.d/99memstrack/memstrack.service
6edea0
@@ -0,0 +1,13 @@
6edea0
+[Unit]
6edea0
+Description=Memstrack Anylazing Service
6edea0
+DefaultDependencies=no
6edea0
+Before=dracut-cmdline.service systemd-udevd.service local-fs-pre.target
6edea0
+IgnoreOnIsolate=true
6edea0
+
6edea0
+[Service]
6edea0
+Type=simple
6edea0
+ExecStart=/bin/memstrack-start
6edea0
+PIDFile=/run/memstrack.pid
6edea0
+StandardInput=null
6edea0
+StandardOutput=syslog+console
6edea0
+StandardError=syslog+console
6edea0
diff --git a/modules.d/99memstrack/module-setup.sh b/modules.d/99memstrack/module-setup.sh
6edea0
new file mode 100755
6edea0
index 00000000..d5bacb4d
6edea0
--- /dev/null
6edea0
+++ b/modules.d/99memstrack/module-setup.sh
6edea0
@@ -0,0 +1,27 @@
6edea0
+#!/usr/bin/bash
6edea0
+
6edea0
+check() {
6edea0
+    if type -P memstrack >/dev/null; then
6edea0
+        dinfo "memstrack is available"
6edea0
+        return 0
6edea0
+    fi
6edea0
+
6edea0
+    dinfo "memstrack is not available"
6edea0
+    dinfo "If you need to use rd.memdebug>=4, please install memstrack"
6edea0
+
6edea0
+    return 1
6edea0
+}
6edea0
+
6edea0
+depends() {
6edea0
+    return 0
6edea0
+}
6edea0
+
6edea0
+install() {
6edea0
+    inst "/bin/memstrack" "/bin/memstrack"
6edea0
+
6edea0
+    inst "$moddir/memstrack-start.sh" "/bin/memstrack-start"
6edea0
+    inst_hook cleanup 99 "$moddir/memstrack-report.sh"
6edea0
+
6edea0
+    inst "$moddir/memstrack.service" "$systemdsystemunitdir/memstrack.service"
6edea0
+    systemctl -q --root "$initdir" add-wants initrd.target memstrack.service
6edea0
+}
6edea0