From c5aa46099283a586492532e3c8a1f3f454f915b6 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Oct 08 2024 05:23:12 +0000 Subject: Introduce vmcore creation notification to kdump Upstream: fedora Resolves: RHEL-32060 Conflict: Yes, there are several conflicts. 1) Upstream have moved dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh, so the targeting files are changed. 2) There are several patchsets([1] [2]) which not backported to rhel9, so some formating conflicts encountered. But there is no functional change been made for the patch backporting. [1]: https://github.com/rhkdump/kdump-utils/pull/18/commits [2]: https://github.com/rhkdump/kdump-utils/pull/33/commits commit 88525ebf5e43cc86aea66dc75ec83db58233883b Author: Tao Liu Date: Thu Sep 5 15:49:07 2024 +1200 Introduce vmcore creation notification to kdump Motivation ========== People may forget to recheck to ensure kdump works, which as a result, a possibility of no vmcores generated after a real system crash. It is unexpected for kdump. It is highly recommended people to recheck kdump after any system modification, such as: a. after kernel patching or whole yum update, as it might break something on which kdump is dependent, maybe due to introduction of any new bug etc. b. after any change at hardware level, maybe storage, networking, firmware upgrading etc. c. after implementing any new application, like which involves 3rd party modules etc. Though these exceed the range of kdump, however a simple vmcore creation status notification is good to have for now. Design ====== Kdump currently will check any relating files/fs/drivers modified before determine if initrd should rebuild when (re)start. A rebuild is an indicator of such modification, and kdump need to be rechecked. This will clear the vmcore creation status specified in $VMCORE_CREATION_STATUS. Vmcore creation check will happen at "kdumpctl (re)start/status", and will report the creation success/fail status to users. A "success" status indicates previously there has been a vmcore successfully generated based on the current env, so it is more likely a vmcore will be generated later when real crash happens; A "fail" status indicates previously there was no vmcore generated, or has been a vmcore creation failed based on current env. User should check the 2nd kernel log or the kexec-dmesg.log for the failing reason. $VMCORE_CREATION_STATUS is used for recording the vmcore creation status of the current env. The format will be like: success 1718682002 Which means, there has been a vmcore generated successfully at this timestamp for the current env. Usage ===== [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: No vmcore creation test performed! [root@localhost ~]# kdumpctl test [root@localhost ~]# kdumpctl status kdump: Kdump is operational kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 The notification for kdumpctl (re)start/status can be disabled by setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump Signed-off-by: Tao Liu Signed-off-by: Tao Liu --- diff --git a/dracut-kdump.sh b/dracut-kdump.sh index 80acc93..2505fc6 100755 --- a/dracut-kdump.sh +++ b/dracut-kdump.sh @@ -306,11 +306,22 @@ do_final_action() do_dump() { - eval $DUMP_INSTRUCTION + if [ -d /vmcorestatus ]; then + _vmcore_creation_status="/vmcorestatus/$VMCORE_CREATION_STATUS" + else + _vmcore_creation_status="/sysroot/$VMCORE_CREATION_STATUS" + fi + + set_vmcore_creation_status 'clear' "$_vmcore_creation_status" + + eval "$DUMP_INSTRUCTION" _ret=$? if [ $_ret -ne 0 ]; then + set_vmcore_creation_status 'fail' "$_vmcore_creation_status" derror "saving vmcore failed" + else + set_vmcore_creation_status 'success' "$_vmcore_creation_status" fi return $_ret diff --git a/dracut-module-setup.sh b/dracut-module-setup.sh index 8b67c86..7565651 100755 --- a/dracut-module-setup.sh +++ b/dracut-module-setup.sh @@ -1144,6 +1144,7 @@ install() { inst "/usr/bin/printf" "/sbin/printf" inst "/usr/bin/logger" "/sbin/logger" inst "/usr/bin/chmod" "/sbin/chmod" + inst "/usr/bin/dirname" "/sbin/dirname" inst "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump-lib-initramfs.sh" inst "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh" inst "$moddir/kdump.sh" "/usr/bin/kdump.sh" diff --git a/kdump-lib-initramfs.sh b/kdump-lib-initramfs.sh index 41dc751..b3e1aa7 100755 --- a/kdump-lib-initramfs.sh +++ b/kdump-lib-initramfs.sh @@ -8,6 +8,7 @@ KDUMP_CONFIG_FILE="/etc/kdump.conf" FENCE_KDUMP_CONFIG_FILE="/etc/sysconfig/fence_kdump" FENCE_KDUMP_SEND="/usr/libexec/fence_kdump_send" LVM_CONF="/etc/lvm/lvm.conf" +VMCORE_CREATION_STATUS="/var/crash/vmcore-creation.status" # Read kdump config in well formated style kdump_read_conf() @@ -175,3 +176,35 @@ kdump_get_ip_route_field() { echo "$1" | sed -n -e "s/^.*\<$2\>\s\+\(\S\+\).*$/\1/p" } + +# $1: success/fail/clear +# $2: status_file +set_vmcore_creation_status() +{ + _status=$1 + _status_file=$2 + _dir=$(dirname "$_status_file") + + [[ -d "$_dir" ]] || mkdir -p "$_dir" + + _mnt_op=$(get_mount_info OPTIONS target "$_dir" -f) + case $_mnt_op in + ro*) + dinfo "remounting the vmcore status target in rw mode." + mount -o remount,rw "$(findmnt -n -o TARGET --target $_dir)" + ;; + esac + + case "$_status" in + success | fail) + dinfo "saving vmcore status file to $_status_file" + echo "$_status $(date +%s)" > "$_status_file" + ;; + clear) + rm -f "$_status_file" + ;; + *) + return + esac + sync -f "$_dir" +} diff --git a/kdump.sysconfig b/kdump.sysconfig index a185e41..1b720a9 100644 --- a/kdump.sysconfig +++ b/kdump.sysconfig @@ -36,6 +36,10 @@ KEXEC_ARGS="" #What is the image type used for kdump KDUMP_IMG="vmlinuz" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + # Logging is controlled by following variables in the first kernel: # - @var KDUMP_STDLOGLVL - logging level to standard error (console output) # - @var KDUMP_SYSLOGLVL - logging level to syslog (by logger command) diff --git a/kdump.sysconfig.aarch64 b/kdump.sysconfig.aarch64 index df75f94..0c3bc7c 100644 --- a/kdump.sysconfig.aarch64 +++ b/kdump.sysconfig.aarch64 @@ -36,6 +36,10 @@ KEXEC_ARGS="-s" #What is the image type used for kdump KDUMP_IMG="vmlinuz" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + # Logging is controlled by following variables in the first kernel: # - @var KDUMP_STDLOGLVL - logging level to standard error (console output) # - @var KDUMP_SYSLOGLVL - logging level to syslog (by logger command) diff --git a/kdump.sysconfig.i386 b/kdump.sysconfig.i386 index d8bf5f6..70860ea 100644 --- a/kdump.sysconfig.i386 +++ b/kdump.sysconfig.i386 @@ -39,6 +39,10 @@ KDUMP_IMG="vmlinuz" #What is the images extension. Relocatable kernels don't have one KDUMP_IMG_EXT="" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + # Logging is controlled by following variables in the first kernel: # - @var KDUMP_STDLOGLVL - logging level to standard error (console output) # - @var KDUMP_SYSLOGLVL - logging level to syslog (by logger command) diff --git a/kdump.sysconfig.ppc64 b/kdump.sysconfig.ppc64 index b7c4e79..445ab1f 100644 --- a/kdump.sysconfig.ppc64 +++ b/kdump.sysconfig.ppc64 @@ -39,6 +39,10 @@ KDUMP_IMG="vmlinuz" #What is the images extension. Relocatable kernels don't have one KDUMP_IMG_EXT="" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + #Specify the action after failure # Logging is controlled by following variables in the first kernel: diff --git a/kdump.sysconfig.ppc64le b/kdump.sysconfig.ppc64le index c1cee45..d798451 100644 --- a/kdump.sysconfig.ppc64le +++ b/kdump.sysconfig.ppc64le @@ -39,6 +39,10 @@ KDUMP_IMG="vmlinuz" #What is the images extension. Relocatable kernels don't have one KDUMP_IMG_EXT="" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + #Specify the action after failure # Logging is controlled by following variables in the first kernel: diff --git a/kdump.sysconfig.s390x b/kdump.sysconfig.s390x index b823093..a7a79a9 100644 --- a/kdump.sysconfig.s390x +++ b/kdump.sysconfig.s390x @@ -42,6 +42,10 @@ KDUMP_IMG="vmlinuz" #What is the images extension. Relocatable kernels don't have one KDUMP_IMG_EXT="" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + # Logging is controlled by following variables in the first kernel: # - @var KDUMP_STDLOGLVL - logging level to standard error (console output) # - @var KDUMP_SYSLOGLVL - logging level to syslog (by logger command) diff --git a/kdump.sysconfig.x86_64 b/kdump.sysconfig.x86_64 index 09d7350..d3a5ce4 100644 --- a/kdump.sysconfig.x86_64 +++ b/kdump.sysconfig.x86_64 @@ -39,6 +39,10 @@ KDUMP_IMG="vmlinuz" #What is the images extension. Relocatable kernels don't have one KDUMP_IMG_EXT="" +# Enable vmcore creation notification by default, disable by setting +# VMCORE_CREATION_NOTIFICATION="" +VMCORE_CREATION_NOTIFICATION="yes" + # Logging is controlled by following variables in the first kernel: # - @var KDUMP_STDLOGLVL - logging level to standard error (console output) # - @var KDUMP_SYSLOGLVL - logging level to syslog (by logger command) diff --git a/kdumpctl b/kdumpctl index cb20708..40dcb1e 100755 --- a/kdumpctl +++ b/kdumpctl @@ -152,6 +152,8 @@ rebuild_initrd() else rebuild_kdump_initrd fi + + set_vmcore_creation_status 'clear' "$VMCORE_CREATION_STATUS" } #$1: the files to be checked with IFS=' ' @@ -1055,6 +1057,8 @@ start() fi dinfo "Starting kdump: [OK]" + check_vmcore_creation_status + return 0 } reload() @@ -1756,6 +1760,62 @@ if [[ ! -f $KDUMP_CONFIG_FILE ]]; then exit 1 fi +check_vmcore_creation_status() +{ + local _status _timestamp _status_date + + [[ ${VMCORE_CREATION_NOTIFICATION,,} == "yes" ]] || return + + if [[ ! -s $VMCORE_CREATION_STATUS ]]; then + dwarn "Notice: No vmcore creation test performed!" + return + fi + + read -r _status _timestamp < "$VMCORE_CREATION_STATUS" + _status_date="$(date -d "@$_timestamp")" + if [[ "$_status" == "success" ]]; then + dinfo "Notice: Last successful vmcore creation on $_status_date" + else + dwarn "Notice: Last NOT successful vmcore creation on $_status_date" + fi +} + +kdump_test() +{ + local _dir + + if ! is_kernel_loaded "$DEFAULT_DUMP_MODE"; then + derror "Kdump needs be operational before test." + exit 1 + fi + + _dir=$(dirname "$VMCORE_CREATION_STATUS") + if ! [[ -d "$_dir" ]]; then + derror "Vmcore status dir $_dir not exist." + exit 1 + fi + + if ! lsblk $(get_mount_info SOURCE target "$_dir") > /dev/null; then + derror "$VMCORE_CREATION_STATUS must on local drive" + exit 1 + fi + + if [[ ! "$1" == "--force" ]]; then + read -p "DANGER!!! Will perform a kdump test by crashing the system, proceed? (y/N): " input + case $input in + [Yy] ) + dinfo "Start kdump test..." + ;; + * ) + dinfo "Operation cancelled." + exit 0 + ;; + esac + fi + set_vmcore_creation_status 'clear' "$VMCORE_CREATION_STATUS" + echo c > /proc/sysrq-trigger +} + main() { # Determine if the dump mode is kdump or fadump @@ -1786,6 +1846,7 @@ main() EXIT_CODE=3 ;; esac + check_vmcore_creation_status exit $EXIT_CODE ;; reload) @@ -1826,8 +1887,12 @@ main() reset_crashkernel_for_installed_kernel "$2" fi ;; + test) + shift + kdump_test "$@" + ;; *) - dinfo $"Usage: $0 {estimate|start|stop|status|restart|reload|rebuild|reset-crashkernel|propagate|showmem}" + dinfo $"Usage: $0 {estimate|start|stop|status|restart|reload|rebuild|reset-crashkernel|propagate|showmem|test}" exit 1 ;; esac diff --git a/kdumpctl.8 b/kdumpctl.8 index 29a6119..f6a7070 100644 --- a/kdumpctl.8 +++ b/kdumpctl.8 @@ -70,6 +70,16 @@ Note: The memory requirements for kdump varies heavily depending on the used hardware and system configuration. Thus the recommended crashkernel might not work for your specific setup. Please test if kdump works after resetting the crashkernel value. +.TP +.I test [--force] +Test the kdump by actually trigger the system crash & dump, and check if a +vmcore can really be generated successfully based on current config and +environment. After system reboot back to normal, check the test result +by "kdumpctl status". + +If the optional parameter [--force] is provided, there will be no interact +before triggering the system crash. Dangerous though, this option is meant +for automation testing. .SH "SEE ALSO" .BR kdump.conf (5), diff --git a/mkdumprd b/mkdumprd index af0006d..0126d60 100644 --- a/mkdumprd +++ b/mkdumprd @@ -61,9 +61,10 @@ add_dracut_sshkey() # caller should ensure $1 is valid and mounted in 1st kernel to_mount() { - local _target=$1 _fstype=$2 _options=$3 _sed_cmd _new_mntpoint _pdev + local _target=$1 _fstype=$2 _options=$3 _new_mntpoint=$4 + local _sed_cmd _pdev - _new_mntpoint=$(get_kdump_mntpoint_from_target "$_target") + _new_mntpoint="${_new_mntpoint:-$(get_kdump_mntpoint_from_target "$_target")}" _fstype="${_fstype:-$(get_fs_type_from_target "$_target")}" _options="${_options:-$(get_mntopt_from_target "$_target")}" _options="${_options:-defaults}" @@ -474,6 +475,17 @@ if [[ -d /sys/module/nvme ]]; then add_dracut_arg "--add-drivers" "nvme" fi +status_target=$(get_target_from_path $(dirname "$VMCORE_CREATION_STATUS")) + +if [[ $(get_root_fs_device) != "$status_target" ]]; then + new_mntpoint=$(echo /vmcorestatus/$(get_mntpoint_from_target "$status_target") \ + | tr -s "/") + add_mount "$status_target" "" "" "$new_mntpoint" +elif ! is_fadump_capable && \ + ! [[ ${dracut_args[@]} == *"$(kdump_get_persistent_dev $status_target)"* ]]; then + add_mount "$status_target" +fi + dracut "${dracut_args[@]}" "$@" _rc=$?