diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5898f16 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +SOURCES/eppic-d84c354.tar.gz +SOURCES/kexec-tools-2.0.22.tar.xz +SOURCES/makedumpfile-1.6.9.tar.gz diff --git a/.kexec-tools.metadata b/.kexec-tools.metadata new file mode 100644 index 0000000..5909283 --- /dev/null +++ b/.kexec-tools.metadata @@ -0,0 +1,3 @@ +b902add474c63fe34c4cdf2eba882d7f8675e04e SOURCES/eppic-d84c354.tar.gz +d23d055fb98f7fa68a2888b52f11530ba3b3a206 SOURCES/kexec-tools-2.0.22.tar.xz +f07e6ce86b1e2e20b27137c26f08cd15e5460aca SOURCES/makedumpfile-1.6.9.tar.gz diff --git a/SOURCES/60-kdump.install b/SOURCES/60-kdump.install new file mode 100755 index 0000000..5b0e021 --- /dev/null +++ b/SOURCES/60-kdump.install @@ -0,0 +1,36 @@ +#!/usr/bin/bash + +COMMAND="$1" +KERNEL_VERSION="$2" +KDUMP_INITRD_DIR_ABS="$3" +KERNEL_IMAGE="$4" + +if ! [[ ${KERNEL_INSTALL_MACHINE_ID-x} ]]; then + exit 0 +fi + +if [[ -d "$KDUMP_INITRD_DIR_ABS" ]]; then + KDUMP_INITRD="initrdkdump" +else + # If `KDUMP_BOOTDIR` is not writable, then the kdump + # initrd must have been placed at `/var/lib/kdump` + if [[ ! -w "/boot" ]]; then + KDUMP_INITRD_DIR_ABS="/var/lib/kdump" + else + KDUMP_INITRD_DIR_ABS="/boot" + fi + KDUMP_INITRD="initramfs-${KERNEL_VERSION}kdump.img" +fi + +ret=0 +case "$COMMAND" in + add) + # Do nothing, kdump initramfs is strictly host only + # and managed by kdump service + ;; + remove) + rm -f -- "$KDUMP_INITRD_DIR_ABS/$KDUMP_INITRD" + ret=$? + ;; +esac +exit $ret diff --git a/SOURCES/92-crashkernel.install b/SOURCES/92-crashkernel.install new file mode 100755 index 0000000..7613bc6 --- /dev/null +++ b/SOURCES/92-crashkernel.install @@ -0,0 +1,143 @@ +#!/usr/bin/bash + +COMMAND="$1" +KERNEL_VERSION="$2" +KDUMP_INITRD_DIR_ABS="$3" +KERNEL_IMAGE="$4" + +grub_etc_default="/etc/default/grub" + +ver_lt() { + [[ "$(echo -e "$1\n$2" | sort -V)" == $1$'\n'* ]] && [[ $1 != "$2" ]] +} + +# Read crashkernel= value in /etc/default/grub +get_grub_etc_ck() { + [[ -e $grub_etc_default ]] && \ + sed -n -e "s/^GRUB_CMDLINE_LINUX=.*\(crashkernel=[^\ \"]*\)[\ \"].*$/\1/p" $grub_etc_default +} + +# Read crashkernel.default value of specified kernel +get_ck_default() { + ck_file="/usr/lib/modules/$1/crashkernel.default" + [[ -f "$ck_file" ]] && cat "$ck_file" +} + +# Iterate installed kernels, find the kernel with the highest version that has a +# valid crashkernel.default file, exclude current installing/removing kernel +# +# $1: a string representing a crashkernel= cmdline. If given, will also check the +# content of crashkernel.default, only crashkernel.default with the same value will match +get_highest_ck_default_kver() { + for kernel in $(find /usr/lib/modules -maxdepth 1 -mindepth 1 -printf "%f\n" | sort --version-sort -r); do + [[ $kernel == "$KERNEL_VERSION" ]] && continue + [[ -s "/usr/lib/modules/$kernel/crashkernel.default" ]] || continue + + echo "$kernel" + return 0 + done + + return 1 +} + +set_grub_ck() { + sed -i -e "s/^\(GRUB_CMDLINE_LINUX=.*\)crashkernel=[^\ \"]*\([\ \"].*\)$/\1$1\2/" "$grub_etc_default" +} + +# Set specified kernel's crashkernel cmdline value +set_kernel_ck() { + kernel=$1 + ck_cmdline=$2 + + entry=$(grubby --info ALL | grep "^kernel=.*$kernel") + entry=${entry#kernel=} + entry=${entry#\"} + entry=${entry%\"} + + if [[ -z "$entry" ]]; then + echo "$0: failed to find boot entry for kernel $kernel" + return 1 + fi + + [[ -f /etc/zipl.conf ]] && zipl_arg="--zipl" + grubby --args "$ck_cmdline" --update-kernel "$entry" $zipl_arg + [[ $zipl_arg ]] && zipl > /dev/null +} + +case "$COMMAND" in +add) + # - If current boot kernel is using default crashkernel value, update + # installing kernel's crashkernel value to its default value, + # - If intalling a higher version kernel, and /etc/default/grub's + # crashkernel value is using default value, update it to installing + # kernel's default value. + inst_ck_default=$(get_ck_default "$KERNEL_VERSION") + # If installing kernel doesn't have crashkernel.default, just exit. + [[ -z "$inst_ck_default" ]] && exit 0 + + boot_kernel=$(uname -r) + boot_ck_cmdline=$(sed -n -e "s/^.*\(crashkernel=\S*\).*$/\1/p" /proc/cmdline) + highest_ck_default_kver=$(get_highest_ck_default_kver) + highest_ck_default=$(get_ck_default "$highest_ck_default_kver") + + # Try update /etc/default/grub if present, else grub2-mkconfig could + # override crashkernel value. + grub_etc_ck=$(get_grub_etc_ck) + if [[ -n "$grub_etc_ck" ]]; then + if [[ -z "$highest_ck_default_kver" ]]; then + # None of installed kernel have a crashkernel.default, + # check for 'crashkernel=auto' in case of legacy kernel + [[ "$grub_etc_ck" == "crashkernel=auto" ]] && \ + set_grub_ck "$inst_ck_default" + else + # There is a valid crashkernel.default, check if installing kernel + # have a higher version and grub config is using default value + ver_lt "$highest_ck_default_kver" "$KERNEL_VERSION" && \ + [[ "$grub_etc_ck" == "$highest_ck_default" ]] && \ + [[ "$grub_etc_ck" != "$inst_ck_default" ]] && \ + set_grub_ck "$inst_ck_default" + fi + fi + + # Exit if crashkernel is not used in current cmdline + [[ -z $boot_ck_cmdline ]] && exit 0 + + # Get current boot kernel's default value + boot_ck_default=$(get_ck_default "$boot_kernel") + if [[ $boot_ck_cmdline == "crashkernel=auto" ]]; then + # Legacy RHEL kernel defaults to "auto" + boot_ck_default="$boot_ck_cmdline" + fi + + # If boot kernel doesn't have a crashkernel.default, check + # if it's using any installed kernel's crashkernel.default + if [[ -z $boot_ck_default ]]; then + [[ $(get_highest_ck_default_kver "$boot_ck_cmdline") ]] && boot_ck_default="$boot_ck_cmdline" + fi + + # If boot kernel is using a default crashkernel, update + # installing kernel's crashkernel to new default value + if [[ "$boot_ck_cmdline" != "$inst_ck_default" ]] && [[ "$boot_ck_cmdline" == "$boot_ck_default" ]]; then + set_kernel_ck "$KERNEL_VERSION" "$inst_ck_default" + fi + ;; + +remove) + # If grub default value is upgraded when this kernel was installed, try downgrade it + grub_etc_ck=$(get_grub_etc_ck) + [[ $grub_etc_ck ]] || exit 0 + + removing_ck_conf=$(get_ck_default "$KERNEL_VERSION") + [[ $removing_ck_conf ]] || exit 0 + + highest_ck_default_kver=$(get_highest_ck_default_kver) || exit 0 + highest_ck_default=$(get_ck_default "$highest_ck_default_kver") + [[ $highest_ck_default ]] || exit 0 + + if ver_lt "$highest_ck_default_kver" "$KERNEL_VERSION"; then + if [[ $grub_etc_ck == "$removing_ck_conf" ]] && [[ $grub_etc_ck != "$highest_ck_default" ]]; then + set_grub_ck "$highest_ck_default" + fi + fi + ;; +esac diff --git a/SOURCES/98-kexec.rules b/SOURCES/98-kexec.rules new file mode 100644 index 0000000..b73b701 --- /dev/null +++ b/SOURCES/98-kexec.rules @@ -0,0 +1,16 @@ +SUBSYSTEM=="cpu", ACTION=="add", GOTO="kdump_reload" +SUBSYSTEM=="cpu", ACTION=="remove", GOTO="kdump_reload" +SUBSYSTEM=="memory", ACTION=="online", GOTO="kdump_reload" +SUBSYSTEM=="memory", ACTION=="offline", GOTO="kdump_reload" + +GOTO="kdump_reload_end" + +LABEL="kdump_reload" + +# If kdump is not loaded, calling kdump-udev-throttle will end up +# doing nothing, but systemd-run will always generate extra logs for +# each call, so trigger the kdump-udev-throttler only if kdump +# service is active to avoid unnecessary logs +RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'" + +LABEL="kdump_reload_end" diff --git a/SOURCES/98-kexec.rules.ppc64 b/SOURCES/98-kexec.rules.ppc64 new file mode 100644 index 0000000..a1c00a9 --- /dev/null +++ b/SOURCES/98-kexec.rules.ppc64 @@ -0,0 +1,22 @@ +SUBSYSTEM=="cpu", ACTION=="online", GOTO="kdump_reload_cpu" +SUBSYSTEM=="memory", ACTION=="online", GOTO="kdump_reload_mem" +SUBSYSTEM=="memory", ACTION=="offline", GOTO="kdump_reload_mem" + +GOTO="kdump_reload_end" + +# If kdump is not loaded, calling kdump-udev-throttle will end up +# doing nothing, but systemd-run will always generate extra logs for +# each call, so trigger the kdump-udev-throttler only if kdump +# service is active to avoid unnecessary logs + +LABEL="kdump_reload_mem" + +RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'" + +GOTO="kdump_reload_end" + +LABEL="kdump_reload_cpu" + +RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; ! test -f /sys/kernel/fadump_enabled || cat /sys/kernel/fadump_enabled | grep 0 || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'" + +LABEL="kdump_reload_end" diff --git a/SOURCES/crashkernel-howto.txt b/SOURCES/crashkernel-howto.txt new file mode 100644 index 0000000..4abd090 --- /dev/null +++ b/SOURCES/crashkernel-howto.txt @@ -0,0 +1,204 @@ +Introduction +============ + +This document describes features the kexec-tools package provides for setting +and estimating the crashkernel value. + +Kdump lives in a pre-reserved chunk of memory, and the size of the reserved +memory is specified by the `crashkernel=` kernel parameter. It's hard to +estimate an accurate `crashkernel=` value, so it's always recommended to test +kdump after you updated the `crashkernel=` value or changed the dump target. + + +Default crashkernel value +========================= + +Latest kernel packages includes a `crashkernel.default` file installed in kernel +modules folder, available as: + + /usr/lib/modules//crashkernel.default + +The content of the file will be taken as the default value of 'crashkernel=', or +take this file as a reference for setting crashkernel value manually. + + +New installed system +==================== + +Anaconda is the OS installer which sets all the kernel boot cmdline on a new +installed system. If kdump is set enabled during Anaconda installation, Anaconda +will use the `crashkernel.default` file as the default `crashkernel=` value on +the new installed system. + +Users can also override the value during Anaconda installation manually. + + +Auto update of crashkernel boot parameter +========================================= + +Following context in this section assumes all kernel packages have a +`crashkernel.default` file bundled, which is true for the latest official kernel +packages. For kexec-tools behavior with a kernel that doesn't have a +`crashkernel.default` file, please refer to the “Custom Kernel” section of this +doc. + +When `crashkernel=` is using the default value, kexec-tools will need to update +the `crashkernel=` value of new installed kernels, since the default value may +change in new kernel packages. + +kexec-tools does so by adding a kernel installation hook, which gets triggered +every time a new kernel is installed, so kexec-tools can do necessary checks and +updates. + + +Supported Bootloaders +--------------------- + +This auto update only works with GRUB2 and ZIPL, as kexec-tools heavily depends +on `grubby`. If other boot loaders are used, the user will have to update the +`crashkernel=` value manually. + + +Updating kernel package +----------------------- + +When a new version of package kernel is released in the official repository, the +package will always come with a `crashkernel.default` file bundled. Kexec-tools +will act with following rules: + +If current boot kernel is using the default `crashkernel=` boot param value from +its `crashkernel.default` file, then kexec-tools will update new installed +kernel’s `crashkernel=` boot param using the value from the new installed +kernel’s `crashkernel.default` file. This ensures `crashkernel=` is always using +the latest default value. + +If current boot kernel's `crashkernel=` value is set to a non-default value, the +new installed kernel simply inherits this value. + +On systems using GRUB2 as the bootloader, each kernel has its own boot entry, +making it possible to set different `crashkernel=` boot param values for +different kernels. So kexec-tools won’t touch any already installed kernel's +boot param, only new installed kernel's `crashkernel=` boot param value will be +updated. + +But some utilities like `grub2-mkconfig` and `grubby` can override all boot +entry's boot params with the boot params value from the GRUB config file +`/etc/defaults/grub`, so kexec-tools will also update the GRUB config file in +case old `crashkernel=` value overrides new installed kernel’s boot param. + + +Downgrading kernel package +-------------------------- + +When upgrading a kernel package, kexec-tools may update the `crashkernel=` value +in GRUB2 config file to the new value. So when downgrading the kernel package, +kexec-tools will also try to revert that update by setting GRUB2 config file’s +`crashkernel=` value back to the default value in the older kernel package. This +will only occur when the GRUB2 config file is using the default `crashkernel=` +value. + + +Custom kernel +============= + +To make auto crashkernel update more robust, kexec-tools will try to keep +tracking the default 'crashkernel=` value with kernels that don’t have a +`crashkernel.default` file, such kernels are referred to as “custom kernel” in +this doc. This is only a best-effort support to make it easier debugging and +testing the system. + +When installing a custom kernel that doesn’t have a `crashkernel.default` file, +the `crashkernel=` value will be simply inherited from the current boot kernel. + +When installing a new official kernel package and current boot kernel is a +custom kernel, since the boot kernel doesn’t have a `crashkernel.default` file, +kexec-tools will iterate installed kernels and check if the boot kernel +inherited the default value from any other existing kernels’ +`crashkernel.default` file. If a matching `crashkernel.default` file is found, +kexec-tools will update the new installed kernel `crashkernel=` boot param using +the value from the new installed kernel’s `crashkernel.default` file, ensures +the auto crashkernel value update won’t break over one or two custom kernel +installations. + +It is possible that the auto crashkernel value update will fail when custom +kernels are used. One example is a custom kernel inheriting the default +`crashkernel=` value from an older official kernel package, but later that +kernel package is uninstalled. So when booted with the custom kernel, +kexec-tools can't determine if the boot kernel is inheriting a default +`crashkernel=` value from any official build. In such a case, please refer to +the "Reset crashkernel to default value" section of this doc. + + +Reset crashkernel to default value +================================== + +kexec-tools only perform the auto update of crashkernel value when it can +confirm the boot kernel's crashkernel value is using its corresponding default +value or inherited from any installed kernel. + +kexec-tools may fail to determine if the boot kernel is using default +crashkernel value in some use cases: +- kexec-tools package is absent during a kernel package upgrade, and the new + kernel package’s `crashkernel.default` value has changed. +- Custom kernel is used and the kernel it inherits `crashkernel=` value from is + uninstalled. + +So it's recommended to reset the crashkernel value if users have uninstalled +kexec-tools or using a custom kernel. + +Reset using kdumpctl +-------------------- + +To make it easier to reset the `crashkernel=` kernel cmdline to this default +value properly, `kdumpctl` also provides a sub-command: + + `kdumpctl reset-crashkernel []` + +This command will read from the `crashkernel.default` file and reset +bootloader's kernel cmdline to the default value. It will also update bootloader +config if the bootloader has a standalone config file. User will have to reboot +the machine after this command to make it take effect. + +Reset manually +-------------- + +To reset the crashkernel value manually, it's recommended to use utils like +`grubby`. A one liner script for resetting `crashkernel=` value of all installed +kernels to current boot kernel's crashkernel.default` is: + + grubby --update-kernel ALL --args "$(cat /usr/lib/modules/$(uname -r)/crashkernel.default)" + +Estimate crashkernel +==================== + +The best way to estimate a usable crashkernel value is by testing kdump +manually. And you can set crashkernel to a large value, then adjust the +crashkernel value to an acceptable value gradually. + +`kdumpctl` also provides a sub-command for doing rough estimating without +triggering kdump: + + `kdumpctl estimate` + +The output will be like this: + +``` + Encrypted kdump target requires extra memory, assuming using the keyslot with minimun memory requirement + + Reserved crashkernel: 256M + Recommended crashkernel: 655M + + Kernel image size: 47M + Kernel modules size: 12M + Initramfs size: 19M + Runtime reservation: 64M + LUKS required size: 512M + Large modules: + xfs: 1892352 + nouveau: 2318336 + WARNING: Current crashkernel size is lower than recommended size 655M. +``` + +It will generate a summary report about the estimated memory consumption +of each component of kdump. The value may not be accurate enough, but +would be a good start for finding a suitable crashkernel value. diff --git a/SOURCES/dracut-early-kdump-module-setup.sh b/SOURCES/dracut-early-kdump-module-setup.sh new file mode 100755 index 0000000..b25d6b5 --- /dev/null +++ b/SOURCES/dracut-early-kdump-module-setup.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +. /etc/sysconfig/kdump + +KDUMP_KERNEL="" +KDUMP_INITRD="" + +check() { + if [ ! -f /etc/sysconfig/kdump ] || [ ! -f /lib/kdump/kdump-lib.sh ]\ + || [ -n "${IN_KDUMP}" ] + then + return 1 + fi + return 255 +} + +depends() { + echo "base shutdown" + return 0 +} + +prepare_kernel_initrd() { + . /lib/kdump/kdump-lib.sh + + prepare_kdump_bootinfo + + # $kernel is a variable from dracut + if [ "$KDUMP_KERNELVER" != $kernel ]; then + dwarn "Using kernel version '$KDUMP_KERNELVER' for early kdump," \ + "but the initramfs is generated for kernel version '$kernel'" + fi +} + +install() { + prepare_kernel_initrd + if [ ! -f "$KDUMP_KERNEL" ]; then + derror "Could not find required kernel for earlykdump," \ + "earlykdump will not work!" + return 1 + fi + if [ ! -f "$KDUMP_INITRD" ]; then + derror "Could not find required kdump initramfs for earlykdump," \ + "please ensure kdump initramfs is generated first," \ + "earlykdump will not work!" + return 1 + fi + + inst_multiple tail find cut dirname hexdump + inst_simple "/etc/sysconfig/kdump" + inst_binary "/usr/sbin/kexec" + inst_binary "/usr/bin/gawk" "/usr/bin/awk" + inst_binary "/usr/bin/logger" "/usr/bin/logger" + inst_binary "/usr/bin/printf" "/usr/bin/printf" + inst_script "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh" + inst_script "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh" + inst_hook cmdline 00 "$moddir/early-kdump.sh" + inst_binary "$KDUMP_KERNEL" + inst_binary "$KDUMP_INITRD" + + ln_r "$KDUMP_KERNEL" "/boot/kernel-earlykdump" + ln_r "$KDUMP_INITRD" "/boot/initramfs-earlykdump" + + chmod -x "${initdir}/$KDUMP_KERNEL" +} diff --git a/SOURCES/dracut-early-kdump.sh b/SOURCES/dracut-early-kdump.sh new file mode 100755 index 0000000..129841e --- /dev/null +++ b/SOURCES/dracut-early-kdump.sh @@ -0,0 +1,88 @@ +#! /bin/sh + +KEXEC=/sbin/kexec +standard_kexec_args="-p" + +EARLY_KDUMP_INITRD="" +EARLY_KDUMP_KERNEL="" +EARLY_KDUMP_CMDLINE="" +EARLY_KDUMP_KERNELVER="" +EARLY_KEXEC_ARGS="" + +. /etc/sysconfig/kdump +. /lib/dracut-lib.sh +. /lib/kdump-lib.sh +. /lib/kdump-logger.sh + +#initiate the kdump logger +dlog_init +if [ $? -ne 0 ]; then + echo "failed to initiate the kdump logger." + exit 1 +fi + +prepare_parameters() +{ + EARLY_KDUMP_CMDLINE=$(prepare_cmdline "${KDUMP_COMMANDLINE}" "${KDUMP_COMMANDLINE_REMOVE}" "${KDUMP_COMMANDLINE_APPEND}") + EARLY_KDUMP_KERNEL="/boot/kernel-earlykdump" + EARLY_KDUMP_INITRD="/boot/initramfs-earlykdump" +} + +early_kdump_load() +{ + check_kdump_feasibility + if [ $? -ne 0 ]; then + return 1 + fi + + if is_fadump_capable; then + dwarn "WARNING: early kdump doesn't support fadump." + return 1 + fi + + check_current_kdump_status + if [ $? == 0 ]; then + return 1 + fi + + prepare_parameters + + EARLY_KEXEC_ARGS=$(prepare_kexec_args "${KEXEC_ARGS}") + + if is_secure_boot_enforced; then + dinfo "Secure Boot is enabled. Using kexec file based syscall." + EARLY_KEXEC_ARGS="$EARLY_KEXEC_ARGS -s" + fi + + # Here, only output the messages, but do not save these messages + # to a file because the target disk may not be mounted yet, the + # earlykdump is too early. + ddebug "earlykdump: $KEXEC ${EARLY_KEXEC_ARGS} $standard_kexec_args \ + --command-line=$EARLY_KDUMP_CMDLINE --initrd=$EARLY_KDUMP_INITRD \ + $EARLY_KDUMP_KERNEL" + + $KEXEC ${EARLY_KEXEC_ARGS} $standard_kexec_args \ + --command-line="$EARLY_KDUMP_CMDLINE" \ + --initrd=$EARLY_KDUMP_INITRD $EARLY_KDUMP_KERNEL + if [ $? == 0 ]; then + dinfo "kexec: loaded early-kdump kernel" + return 0 + else + derror "kexec: failed to load early-kdump kernel" + return 1 + fi +} + +set_early_kdump() +{ + if getargbool 0 rd.earlykdump; then + dinfo "early-kdump is enabled." + early_kdump_load + else + dinfo "early-kdump is disabled." + fi + + return 0 +} + +set_early_kdump diff --git a/SOURCES/dracut-fadump-init-fadump.sh b/SOURCES/dracut-fadump-init-fadump.sh new file mode 100755 index 0000000..94a3751 --- /dev/null +++ b/SOURCES/dracut-fadump-init-fadump.sh @@ -0,0 +1,48 @@ +#!/bin/sh +export PATH=/usr/bin:/usr/sbin +export SYSTEMD_IN_INITRD=lenient + +[ -e /proc/mounts ] || + (mkdir -p /proc && mount -t proc -o nosuid,noexec,nodev proc /proc) + +grep -q '^sysfs /sys sysfs' /proc/mounts || + (mkdir -p /sys && mount -t sysfs -o nosuid,noexec,nodev sysfs /sys) + +grep -q '^none / ' /proc/mounts || grep -q '^rootfs / ' /proc/mounts && ROOTFS_IS_RAMFS=1 + +if [ -f /proc/device-tree/rtas/ibm,kernel-dump ] || [ -f /proc/device-tree/ibm,opal/dump/mpipl-boot ]; then + mkdir /newroot + mount -t ramfs ramfs /newroot + + if [ $ROOTFS_IS_RAMFS ]; then + for FILE in $(ls -A /fadumproot/); do + mv /fadumproot/$FILE /newroot/ + done + exec switch_root /newroot /init + else + mkdir /newroot/sys /newroot/proc /newroot/dev /newroot/run /newroot/oldroot + + grep -q '^devtmpfs /dev devtmpfs' /proc/mounts && mount --move /dev /newroot/dev + grep -q '^tmpfs /run tmpfs' /proc/mounts && mount --move /run /newroot/run + mount --move /sys /newroot/sys + mount --move /proc /newroot/proc + + cp --reflink=auto --sparse=auto --preserve=mode,timestamps,links -dfr /fadumproot/. /newroot/ + cd /newroot && pivot_root . oldroot + + loop=1 + while [ $loop ]; do + unset loop + while read -r _ mp _; do + case $mp in + /oldroot/*) umount -d "$mp" && loop=1 ;; + esac + done > /tmp/dd_progress_file 2>&1 || return 1 + sync + + dinfo "saving vmcore complete" + return 0 +} + +dump_ssh() +{ + local _ret=0 + local _exitcode=0 _exitcode2=0 + local _opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes" + local _dir="$KDUMP_PATH/$HOST_IP-$DATEDIR" + local _host=$2 + local _vmcore="vmcore" + local _ipv6_addr="" _username="" + + dinfo "saving to $_host:$_dir" + + cat /var/lib/random-seed > /dev/urandom + ssh -q $_opt $_host mkdir -p $_dir || return 1 + + save_vmcore_dmesg_ssh ${DMESG_COLLECTOR} ${_dir} "${_opt}" $_host + save_opalcore_ssh ${_dir} "${_opt}" $_host + + dinfo "saving vmcore" + + if is_ipv6_address "$_host"; then + _username=${_host%@*} + _ipv6_addr="[${_host#*@}]" + fi + + if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then + if [ -n "$_username" ] && [ -n "$_ipv6_addr" ]; then + scp -q $_opt /proc/vmcore "$_username@$_ipv6_addr:$_dir/vmcore-incomplete" + else + scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete" + fi + _exitcode=$? + else + $CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "umask 0077 && dd bs=512 of=$_dir/vmcore-incomplete" + _exitcode=$? + _vmcore="vmcore.flat" + fi + + if [ $_exitcode -eq 0 ]; then + ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/$_vmcore" + _exitcode2=$? + if [ $_exitcode2 -ne 0 ]; then + derror "moving vmcore failed, _exitcode:$_exitcode2" + else + dinfo "saving vmcore complete" + fi + else + derror "saving vmcore failed, _exitcode:$_exitcode" + fi + + dinfo "saving the $KDUMP_LOG_FILE to $_host:$_dir/" + save_log + if [ -n "$_username" ] && [ -n "$_ipv6_addr" ]; then + scp -q $_opt $KDUMP_LOG_FILE "$_username@$_ipv6_addr:$_dir/" + else + scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/" + fi + _ret=$? + if [ $_ret -ne 0 ]; then + derror "saving log file failed, _exitcode:$_ret" + fi + + if [ $_exitcode -ne 0 ] || [ $_exitcode2 -ne 0 ];then + return 1 + fi + + return 0 +} + +save_opalcore_ssh() { + local _path=$1 + local _opts="$2" + local _location=$3 + local _user_name="" _ipv6addr="" + + ddebug "_path=$_path _opts=$_opts _location=$_location" + + if [ ! -f $OPALCORE ]; then + # Check if we are on an old kernel that uses a different path + if [ -f /sys/firmware/opal/core ]; then + OPALCORE="/sys/firmware/opal/core" + else + return 0 + fi + fi + + if is_ipv6_address "$_host"; then + _user_name=${_location%@*} + _ipv6addr="[${_location#*@}]" + fi + + dinfo "saving opalcore:$OPALCORE to $_location:$_path" + + if [ -n "$_user_name" ] && [ -n "$_ipv6addr" ]; then + scp $_opts $OPALCORE $_user_name@$_ipv6addr:$_path/opalcore-incomplete + else + scp $_opts $OPALCORE $_location:$_path/opalcore-incomplete + fi + if [ $? -ne 0 ]; then + derror "saving opalcore failed" + return 1 + fi + + ssh $_opts $_location mv $_path/opalcore-incomplete $_path/opalcore + dinfo "saving opalcore complete" + return 0 +} + +save_vmcore_dmesg_ssh() { + local _dmesg_collector=$1 + local _path=$2 + local _opts="$3" + local _location=$4 + + dinfo "saving vmcore-dmesg.txt to $_location:$_path" + $_dmesg_collector /proc/vmcore | ssh $_opts $_location "umask 0077 && dd of=$_path/vmcore-dmesg-incomplete.txt" + _exitcode=$? + + if [ $_exitcode -eq 0 ]; then + ssh -q $_opts $_location mv $_path/vmcore-dmesg-incomplete.txt $_path/vmcore-dmesg.txt + dinfo "saving vmcore-dmesg.txt complete" + else + derror "saving vmcore-dmesg.txt failed" + fi +} + +get_host_ip() +{ + local _host + if is_nfs_dump_target || is_ssh_dump_target + then + kdumpnic=$(getarg kdumpnic=) + [ -z "$kdumpnic" ] && derror "failed to get kdumpnic!" && return 1 + _host=`ip addr show dev $kdumpnic|grep '[ ]*inet'` + [ $? -ne 0 ] && derror "wrong kdumpnic: $kdumpnic" && return 1 + _host=`echo $_host | head -n 1 | cut -d' ' -f2` + _host="${_host%%/*}" + [ -z "$_host" ] && derror "wrong kdumpnic: $kdumpnic" && return 1 + HOST_IP=$_host + fi + return 0 +} + +read_kdump_conf() +{ + if [ ! -f "$KDUMP_CONF" ]; then + derror "$KDUMP_CONF not found" + return + fi + + get_kdump_confs + + # rescan for add code for dump target + while read config_opt config_val; + do + # remove inline comments after the end of a directive. + case "$config_opt" in + dracut_args) + config_val=$(get_dracut_args_target "$config_val") + if [ -n "$config_val" ]; then + config_val=$(get_mntpoint_from_target "$config_val") + add_dump_code "dump_fs $config_val" + fi + ;; + ext[234]|xfs|btrfs|minix|nfs) + config_val=$(get_mntpoint_from_target "$config_val") + add_dump_code "dump_fs $config_val" + ;; + raw) + add_dump_code "dump_raw $config_val" + ;; + ssh) + add_dump_code "dump_ssh $SSH_KEY_LOCATION $config_val" + ;; + esac + done <<< "$(read_strip_comments $KDUMP_CONF)" +} + +fence_kdump_notify() +{ + if [ -n "$FENCE_KDUMP_NODES" ]; then + $FENCE_KDUMP_SEND $FENCE_KDUMP_ARGS $FENCE_KDUMP_NODES & + fi +} + +read_kdump_conf +fence_kdump_notify + +get_host_ip +if [ $? -ne 0 ]; then + derror "get_host_ip exited with non-zero status!" + exit 1 +fi + +if [ -z "$DUMP_INSTRUCTION" ]; then + add_dump_code "dump_fs $NEWROOT" +fi + +do_kdump_pre +if [ $? -ne 0 ]; then + derror "kdump_pre script exited with non-zero status!" + do_final_action + # During systemd service to reboot the machine, stop this shell script running + exit 1 +fi +make_trace_mem "kdump saving vmcore" '1:shortmem' '2+:mem' '3+:slab' +do_dump +DUMP_RETVAL=$? + +do_kdump_post $DUMP_RETVAL +if [ $? -ne 0 ]; then + derror "kdump_post script exited with non-zero status!" +fi + +if [ $DUMP_RETVAL -ne 0 ]; then + exit 1 +fi + +do_final_action diff --git a/SOURCES/dracut-module-setup.sh b/SOURCES/dracut-module-setup.sh new file mode 100755 index 0000000..c6d6fee --- /dev/null +++ b/SOURCES/dracut-module-setup.sh @@ -0,0 +1,1079 @@ +#!/bin/bash + +kdump_module_init() { + if ! [[ -d "${initdir}/tmp" ]]; then + mkdir -p "${initdir}/tmp" + fi + + . /lib/kdump/kdump-lib.sh +} + +check() { + [[ $debug ]] && set -x + #kdumpctl sets this explicitly + if [ -z "$IN_KDUMP" ] || [ ! -f /etc/kdump.conf ] + then + return 1 + fi + return 0 +} + +depends() { + local _dep="base shutdown" + + kdump_module_init + + add_opt_module() { + [[ " $omit_dracutmodules " != *\ $1\ * ]] && _dep="$_dep $1" + } + + if is_squash_available; then + add_opt_module squash + else + dwarning "Required modules to build a squashed kdump image is missing!" + fi + + if is_wdt_active; then + add_opt_module watchdog + fi + + if is_ssh_dump_target; then + _dep="$_dep ssh-client" + fi + + if [ "$(uname -m)" = "s390x" ]; then + _dep="$_dep znet" + fi + + if [ -n "$( find /sys/devices -name drm )" ] || [ -d /sys/module/hyperv_fb ]; then + add_opt_module drm + fi + + if is_generic_fence_kdump || is_pcs_fence_kdump; then + _dep="$_dep network" + fi + + echo $_dep +} + +kdump_is_bridge() { + [ -d /sys/class/net/"$1"/bridge ] +} + +kdump_is_bond() { + [ -d /sys/class/net/"$1"/bonding ] +} + +kdump_is_team() { + [ -f /usr/bin/teamnl ] && teamnl $1 ports &> /dev/null +} + +kdump_is_vlan() { + [ -f /proc/net/vlan/"$1" ] +} + +# $1: netdev name +source_ifcfg_file() { + local ifcfg_file + + dwarning "Network Scripts are deprecated. You are encouraged to set up network by NetworkManager." + ifcfg_file=$(get_ifcfg_filename $1) + if [ -f "${ifcfg_file}" ]; then + . ${ifcfg_file} + else + dwarning "The ifcfg file of $1 is not found!" + fi +} + +# $1: nmcli connection show output +kdump_setup_dns() { + local _netdev="$1" + local _nm_show_cmd="$2" + local _nameserver _dns _tmp array + local _dnsfile=${initdir}/etc/cmdline.d/42dns.conf + + _tmp=$(get_nmcli_value_by_field "$_nm_show_cmd" "IP4.DNS") + array=(${_tmp//|/ }) + if [[ ${array[@]} ]]; then + for _dns in "${array[@]}" + do + echo "nameserver=$_dns" >> "$_dnsfile" + done + else + dwarning "Failed to get DNS info via nmcli output. Now try sourcing ifcfg script" + source_ifcfg_file "$_netdev" + [ -n "$DNS1" ] && echo "nameserver=$DNS1" > "$_dnsfile" + [ -n "$DNS2" ] && echo "nameserver=$DNS2" >> "$_dnsfile" + fi + + while read content; + do + _nameserver=$(echo $content | grep ^nameserver) + [ -z "$_nameserver" ] && continue + + _dns=$(echo $_nameserver | cut -d' ' -f2) + [ -z "$_dns" ] && continue + + if [ ! -f $_dnsfile ] || [ ! $(cat $_dnsfile | grep -q $_dns) ]; then + echo "nameserver=$_dns" >> "$_dnsfile" + fi + done < "/etc/resolv.conf" +} + +# $1: repeat times +# $2: string to be repeated +# $3: separator +repeatedly_join_str() { + local _count="$1" + local _str="$2" + local _separator="$3" + local i _res + + if [[ "$_count" -le 0 ]]; then + echo -n "" + return + fi + + i=0 + _res="$_str" + ((_count--)) + + while [[ "$i" -lt "$_count" ]]; do + ((i++)) + _res="${_res}${_separator}${_str}" + done + echo -n "$_res" +} + +# $1: prefix +# $2: ipv6_flag="-6" indicates it's IPv6 +# Given a prefix, calculate the netmask (equivalent of "ipcalc -m") +# by concatenating three parts, +# 1) the groups with all bits set 1 +# 2) a group with partial bits set to 0 +# 3) the groups with all bits set to 0 +cal_netmask_by_prefix() { + local _prefix="$1" + local _ipv6_flag="$2" _ipv6 + local _bits_per_octet=8 + local _count _res _octets_per_group _octets_total _seperator _total_groups + local _max_group_value _max_group_value_repr _bits_per_group _tmp _zero_bits + + if [[ "$_ipv6_flag" == "-6" ]]; then + _ipv6=1 + else + _ipv6=0 + fi + + if [[ "$_prefix" -lt 0 || "$_prefix" -gt 128 ]] || \ + ( ((!_ipv6)) && [[ "$_prefix" -gt 32 ]] ); then + derror "Bad prefix:$_prefix for calculating netmask" + exit 1 + fi + + if ((_ipv6)); then + _octets_per_group=2 + _octets_total=16 + _seperator=":" + else + _octets_per_group=1 + _octets_total=4 + _seperator="." + fi + + _total_groups=$((_octets_total/_octets_per_group)) + _bits_per_group=$((_octets_per_group * _bits_per_octet)) + _max_group_value=$(((1 << _bits_per_group) - 1)) + + if ((_ipv6)); then + _max_group_value_repr=$(printf "%x" $_max_group_value) + else + _max_group_value_repr="$_max_group_value" + fi + + _count=$((_prefix/_octets_per_group/_bits_per_octet)) + _first_part=$(repeatedly_join_str "$_count" "$_max_group_value_repr" "$_seperator") + _res="$_first_part" + + _tmp=$((_octets_total*_bits_per_octet-_prefix)) + _zero_bits=$(expr $_tmp % $_bits_per_group) + if [[ "$_zero_bits" -ne 0 ]]; then + _second_part=$((_max_group_value >> _zero_bits << _zero_bits)) + if ((_ipv6)); then + _second_part=$(printf "%x" $_second_part) + fi + ((_count++)) + if [[ -z "$_first_part" ]]; then + _res="$_second_part" + else + _res="${_first_part}${_seperator}${_second_part}" + fi + fi + + _count=$((_total_groups-_count)) + if [[ "$_count" -eq 0 ]]; then + echo -n "$_res" + return + fi + + if ((_ipv6)) && [[ "$_count" -gt 1 ]] ; then + # use condensed notion for IPv6 + _third_part=":" + else + _third_part=$(repeatedly_join_str "$_count" "0" "$_seperator") + fi + + if [[ -z "$_res" ]] && ((!_ipv6)) ; then + echo -n "${_third_part}" + else + echo -n "${_res}${_seperator}${_third_part}" + fi +} + +#$1: netdev name +#$2: srcaddr +#if it use static ip echo it, or echo null +kdump_static_ip() { + local _netdev="$1" _srcaddr="$2" kdumpnic="$3" _ipv6_flag + local _netmask _gateway _ipaddr _target _nexthop _prefix + + _ipaddr=$(ip addr show dev $_netdev permanent | awk "/ $_srcaddr\/.* /{print \$2}") + + if is_ipv6_address $_srcaddr; then + _ipv6_flag="-6" + fi + + if [ -n "$_ipaddr" ]; then + _gateway=$(ip $_ipv6_flag route list dev $_netdev | \ + awk '/^default /{print $3}' | head -n 1) + + if [ "x" != "x"$_ipv6_flag ]; then + # _ipaddr="2002::56ff:feb6:56d5/64", _netmask is the number after "/" + _netmask=${_ipaddr#*\/} + _srcaddr="[$_srcaddr]" + _gateway="[$_gateway]" + else + _prefix=$(cut -d'/' -f2 <<< "$_ipaddr") + _netmask=$(cal_netmask_by_prefix "$_prefix" "$_ipv6_flag") + if [[ "$?" -ne 0 ]]; then + derror "Failed to calculate netmask for $_ipaddr" + exit 1 + fi + fi + echo -n "${_srcaddr}::${_gateway}:${_netmask}::" + fi + + /sbin/ip $_ipv6_flag route show | grep -v default |\ + grep ".*via.* $_netdev " | grep -v "^[[:space:]]*nexthop" |\ + while read _route; do + _target=`echo $_route | cut -d ' ' -f1` + _nexthop=`echo $_route | cut -d ' ' -f3` + if [ "x" != "x"$_ipv6_flag ]; then + _target="[$_target]" + _nexthop="[$_nexthop]" + fi + echo "rd.route=$_target:$_nexthop:$kdumpnic" + done >> ${initdir}/etc/cmdline.d/45route-static.conf + + kdump_handle_mulitpath_route $_netdev $_srcaddr $kdumpnic +} + +kdump_handle_mulitpath_route() { + local _netdev="$1" _srcaddr="$2" kdumpnic="$3" _ipv6_flag + local _target _nexthop _route _weight _max_weight _rule + + if is_ipv6_address $_srcaddr; then + _ipv6_flag="-6" + fi + + while IFS="" read _route; do + if [[ "$_route" =~ [[:space:]]+nexthop ]]; then + _route=$(echo "$_route" | sed -e 's/^[[:space:]]*//') + # Parse multipath route, using previous _target + [[ "$_target" == 'default' ]] && continue + [[ "$_route" =~ .*via.*\ $_netdev ]] || continue + + _weight=`echo "$_route" | cut -d ' ' -f7` + if [[ "$_weight" -gt "$_max_weight" ]]; then + _nexthop=`echo "$_route" | cut -d ' ' -f3` + _max_weight=$_weight + if [ "x" != "x"$_ipv6_flag ]; then + _rule="rd.route=[$_target]:[$_nexthop]:$kdumpnic" + else + _rule="rd.route=$_target:$_nexthop:$kdumpnic" + fi + fi + else + [[ -n "$_rule" ]] && echo "$_rule" + _target=`echo "$_route" | cut -d ' ' -f1` + _rule="" _max_weight=0 _weight=0 + fi + done >> ${initdir}/etc/cmdline.d/45route-static.conf\ + <<< "$(/sbin/ip $_ipv6_flag route show)" + + [[ -n $_rule ]] && echo $_rule >> ${initdir}/etc/cmdline.d/45route-static.conf +} + +kdump_get_mac_addr() { + cat /sys/class/net/$1/address +} + +#Bonding or team master modifies the mac address +#of its slaves, we should use perm address +kdump_get_perm_addr() { + local addr=$(ethtool -P $1 | sed -e 's/Permanent address: //') + if [ -z "$addr" ] || [ "$addr" = "00:00:00:00:00:00" ] + then + derror "Can't get the permanent address of $1" + else + echo "$addr" + fi +} + +# Prefix kernel assigned names with "kdump-". EX: eth0 -> kdump-eth0 +# Because kernel assigned names are not persistent between 1st and 2nd +# kernel. We could probably end up with eth0 being eth1, eth0 being +# eth1, and naming conflict happens. +kdump_setup_ifname() { + local _ifname + + # If ifname already has 'kdump-' prefix, we must be switching from + # fadump to kdump. Skip prefixing 'kdump-' in this case as adding + # another prefix may truncate the ifname. Since an ifname with + # 'kdump-' is already persistent, this should be fine. + if [[ $1 =~ eth* ]] && [[ ! $1 =~ ^kdump-* ]]; then + _ifname="kdump-$1" + else + _ifname="$1" + fi + + echo "$_ifname" +} + +kdump_setup_bridge() { + local _netdev=$1 + local _brif _dev _mac _kdumpdev + for _dev in `ls /sys/class/net/$_netdev/brif/`; do + _kdumpdev=$_dev + if kdump_is_bond "$_dev"; then + $(kdump_setup_bond "$_dev" "$(get_nmcli_connection_show_cmd_by_ifname "$_dev")") + if [[ $? != 0 ]]; then + exit 1 + fi + elif kdump_is_team "$_dev"; then + kdump_setup_team "$_dev" + elif kdump_is_vlan "$_dev"; then + kdump_setup_vlan "$_dev" + else + _mac=$(kdump_get_mac_addr $_dev) + _kdumpdev=$(kdump_setup_ifname $_dev) + echo -n " ifname=$_kdumpdev:$_mac" >> ${initdir}/etc/cmdline.d/41bridge.conf + fi + _brif+="$_kdumpdev," + done + echo " bridge=$_netdev:$(echo $_brif | sed -e 's/,$//')" >> ${initdir}/etc/cmdline.d/41bridge.conf +} + +# drauct takes bond=[::[:]] syntax to parse +# bond. For example: +# bond=bond0:eth0,eth1:mode=balance-rr +kdump_setup_bond() { + local _netdev="$1" + local _nm_show_cmd="$2" + local _dev _mac _slaves _kdumpdev _bondoptions + for _dev in `cat /sys/class/net/$_netdev/bonding/slaves`; do + _mac=$(kdump_get_perm_addr $_dev) + _kdumpdev=$(kdump_setup_ifname $_dev) + echo -n " ifname=$_kdumpdev:$_mac" >> ${initdir}/etc/cmdline.d/42bond.conf + _slaves+="$_kdumpdev," + done + echo -n " bond=$_netdev:$(echo $_slaves | sed 's/,$//')" >> ${initdir}/etc/cmdline.d/42bond.conf + + _bondoptions=$(get_nmcli_value_by_field "$_nm_show_cmd" "bond.options") + + if [[ -z "_bondoptions" ]]; then + dwarning "Failed to get bond configuration via nmlci output. Now try sourcing ifcfg script." + source_ifcfg_file $_netdev + _bondoptions="$(echo $BONDING_OPTS | xargs echo | tr " " ",")" + fi + + if [[ -z "_bondoptions" ]]; then + derror "Get empty bond options" + exit 1 + fi + + echo ":$_bondoptions" >> ${initdir}/etc/cmdline.d/42bond.conf +} + +kdump_setup_team() { + local _netdev=$1 + local _dev _mac _slaves _kdumpdev + for _dev in `teamnl $_netdev ports | awk -F':' '{print $2}'`; do + _mac=$(kdump_get_perm_addr $_dev) + _kdumpdev=$(kdump_setup_ifname $_dev) + echo -n " ifname=$_kdumpdev:$_mac" >> ${initdir}/etc/cmdline.d/44team.conf + _slaves+="$_kdumpdev," + done + echo " team=$_netdev:$(echo $_slaves | sed -e 's/,$//')" >> ${initdir}/etc/cmdline.d/44team.conf + #Buggy version teamdctl outputs to stderr! + #Try to use the latest version of teamd. + teamdctl "$_netdev" config dump > ${initdir}/tmp/$$-$_netdev.conf + if [ $? -ne 0 ] + then + derror "teamdctl failed." + exit 1 + fi + inst_dir /etc/teamd + inst_simple ${initdir}/tmp/$$-$_netdev.conf "/etc/teamd/$_netdev.conf" + rm -f ${initdir}/tmp/$$-$_netdev.conf +} + +kdump_setup_vlan() { + local _netdev=$1 + local _phydev="$(awk '/^Device:/{print $2}' /proc/net/vlan/"$_netdev")" + local _netmac="$(kdump_get_mac_addr $_phydev)" + local _kdumpdev + + #Just support vlan over bond and team + if kdump_is_bridge "$_phydev"; then + derror "Vlan over bridge is not supported!" + exit 1 + elif kdump_is_bond "$_phydev"; then + $(kdump_setup_bond "$_phydev" "$(get_nmcli_connection_show_cmd_by_ifname "$_phydev")") + if [[ $? != 0 ]]; then + exit 1 + fi + echo " vlan=$(kdump_setup_ifname $_netdev):$_phydev" > ${initdir}/etc/cmdline.d/43vlan.conf + else + _kdumpdev="$(kdump_setup_ifname $_phydev)" + echo " vlan=$(kdump_setup_ifname $_netdev):$_kdumpdev ifname=$_kdumpdev:$_netmac" > ${initdir}/etc/cmdline.d/43vlan.conf + fi +} + +# find online znet device +# return ifname (_netdev) +# code reaped from the list_configured function of +# https://github.com/hreinecke/s390-tools/blob/master/zconf/znetconf +find_online_znet_device() { + local CCWGROUPBUS_DEVICEDIR="/sys/bus/ccwgroup/devices" + local NETWORK_DEVICES d ifname ONLINE + + [ ! -d "$CCWGROUPBUS_DEVICEDIR" ] && return + NETWORK_DEVICES=$(find $CCWGROUPBUS_DEVICEDIR) + for d in $NETWORK_DEVICES + do + [ ! -f "$d/online" ] && continue + read ONLINE < $d/online + if [ $ONLINE -ne 1 ]; then + continue + fi + # determine interface name, if there (only for qeth and if + # device is online) + if [ -f $d/if_name ] + then + read ifname < $d/if_name + elif [ -d $d/net ] + then + ifname=$(ls $d/net/) + fi + [ -n "$ifname" ] && break + done + echo -n "$ifname" +} + +# setup s390 znet cmdline +# $1: netdev (ifname) +# $2: nmcli connection show output +kdump_setup_znet() { + local _netdev="$1" + local _nmcli_cmd="$2" + local s390_prefix="802-3-ethernet.s390-" + local _options="" + local NETTYPE + local SUBCHANNELS + + NETTYPE=$(get_nmcli_value_by_field "$_nmcli_cmd" "${s390_prefix}nettype") + SUBCHANNELS=$(get_nmcli_value_by_field "$_nmcli_cmd" "${s390_prefix}subchannels") + _options=$(get_nmcli_value_by_field "$_nmcli_cmd" "${s390_prefix}options") + + if [[ -z "$NETTYPE" || -z "$SUBCHANNELS" || -z "$_options" ]]; then + dwarning "Failed to get znet configuration via nmlci output. Now try sourcing ifcfg script." + source_ifcfg_file $_netdev + for i in $OPTIONS; do + _options=${_options},$i + done + fi + + if [[ -z "$NETTYPE" || -z "$SUBCHANNELS" || -z "$_options" ]]; then + exit 1 + fi + + echo rd.znet=${NETTYPE},${SUBCHANNELS},${_options} rd.znet_ifname=$_netdev:${SUBCHANNELS} > ${initdir}/etc/cmdline.d/30znet.conf +} + +kdump_get_ip_route() +{ + local _route=$(/sbin/ip -o route get to $1 2>&1) + [ $? != 0 ] && die "Bad kdump network destination: $1" + echo $_route +} + +kdump_get_ip_route_field() +{ + if `echo $1 | grep -q $2`; then + echo ${1##*$2} | cut -d ' ' -f1 + fi +} + +kdump_get_remote_ip() +{ + local _remote=$(get_remote_host $1) _remote_temp + if is_hostname $_remote; then + _remote_temp=`getent ahosts $_remote | grep -v : | head -n 1` + if [ -z "$_remote_temp" ]; then + _remote_temp=`getent ahosts $_remote | head -n 1` + fi + _remote=`echo $_remote_temp | cut -d' ' -f1` + fi + echo $_remote +} + +# Setup dracut to bring up network interface that enable +# initramfs accessing giving destination +# $1: destination host +kdump_install_net() { + local _destaddr _srcaddr _route _netdev _nm_show_cmd kdumpnic + local _static _proto _ip_conf _ip_opts _ifname_opts + local _znet_netdev _nm_show_cmd_znet + + _destaddr=$(kdump_get_remote_ip $1) + _route=$(kdump_get_ip_route $_destaddr) + _srcaddr=$(kdump_get_ip_route_field "$_route" "src") + _netdev=$(kdump_get_ip_route_field "$_route" "dev") + _nm_show_cmd=$(get_nmcli_connection_show_cmd_by_ifname "$_netdev") + _netmac=$(kdump_get_mac_addr $_netdev) + kdumpnic=$(kdump_setup_ifname $_netdev) + + _znet_netdev=$(find_online_znet_device) + if [[ -n "$_znet_netdev" ]]; then + _nm_show_cmd_znet=$(get_nmcli_connection_show_cmd_by_ifname "$_znet_netdev") + $(kdump_setup_znet "$_znet_netdev" "$_nm_show_cmd_znet") + if [[ $? != 0 ]]; then + derror "Failed to set up znet" + exit 1 + fi + fi + + _static=$(kdump_static_ip $_netdev $_srcaddr $kdumpnic) + if [ -n "$_static" ]; then + _proto=none + elif is_ipv6_address $_srcaddr; then + _proto=auto6 + else + _proto=dhcp + fi + + _ip_conf="${initdir}/etc/cmdline.d/40ip.conf" + _ip_opts=" ip=${_static}$kdumpnic:${_proto}" + + # dracut doesn't allow duplicated configuration for same NIC, even they're exactly the same. + # so we have to avoid adding duplicates + # We should also check /proc/cmdline for existing ip=xx arg. + # For example, iscsi boot will specify ip=xxx arg in cmdline. + if [ ! -f $_ip_conf ] || ! grep -q $_ip_opts $_ip_conf &&\ + ! grep -q "ip=[^[:space:]]*$_netdev" /proc/cmdline; then + echo "$_ip_opts" >> $_ip_conf + fi + + if kdump_is_bridge "$_netdev"; then + kdump_setup_bridge "$_netdev" + elif kdump_is_bond "$_netdev"; then + $(kdump_setup_bond "$_netdev" "$_nm_show_cmd") + if [[ $? != 0 ]]; then + exit 1 + fi + elif kdump_is_team "$_netdev"; then + kdump_setup_team "$_netdev" + elif kdump_is_vlan "$_netdev"; then + kdump_setup_vlan "$_netdev" + else + _ifname_opts=" ifname=$kdumpnic:$_netmac" + echo "$_ifname_opts" >> $_ip_conf + fi + + kdump_setup_dns "$_netdev" "$_nm_show_cmd" + + if [ ! -f ${initdir}/etc/cmdline.d/50neednet.conf ]; then + # network-manager module needs this parameter + echo "rd.neednet" >> ${initdir}/etc/cmdline.d/50neednet.conf + fi + + # Save netdev used for kdump as cmdline + # Whoever calling kdump_install_net() is setting up the default gateway, + # ie. bootdev/kdumpnic. So don't override the setting if calling + # kdump_install_net() for another time. For example, after setting eth0 as + # the default gate way for network dump, eth1 in the fence kdump path will + # call kdump_install_net again and we don't want eth1 to be the default + # gateway. + if [ ! -f ${initdir}/etc/cmdline.d/60kdumpnic.conf ] && + [ ! -f ${initdir}/etc/cmdline.d/70bootdev.conf ]; then + echo "kdumpnic=$kdumpnic" > ${initdir}/etc/cmdline.d/60kdumpnic.conf + echo "bootdev=$kdumpnic" > ${initdir}/etc/cmdline.d/70bootdev.conf + fi +} + +# install etc/kdump/pre.d and /etc/kdump/post.d +kdump_install_pre_post_conf() { + if [ -d /etc/kdump/pre.d ]; then + for file in /etc/kdump/pre.d/*; do + if [ -x "$file" ]; then + dracut_install $file + elif [ $file != "/etc/kdump/pre.d/*" ]; then + echo "$file is not executable" + fi + done + fi + + if [ -d /etc/kdump/post.d ]; then + for file in /etc/kdump/post.d/*; do + if [ -x "$file" ]; then + dracut_install $file + elif [ $file != "/etc/kdump/post.d/*" ]; then + echo "$file is not executable" + fi + done + fi +} + +default_dump_target_install_conf() +{ + local _target _fstype + local _mntpoint _save_path + + is_user_configured_dump_target && return + + _save_path=$(get_bind_mount_source $(get_save_path)) + _target=$(get_target_from_path $_save_path) + _mntpoint=$(get_mntpoint_from_target $_target) + + _fstype=$(get_fs_type_from_target $_target) + if is_fs_type_nfs $_fstype; then + kdump_install_net "$_target" + _fstype="nfs" + else + _target=$(kdump_get_persistent_dev $_target) + fi + + echo "$_fstype $_target" >> ${initdir}/tmp/$$-kdump.conf + + # don't touch the path under root mount + if [ "$_mntpoint" != "/" ]; then + _save_path=${_save_path##"$_mntpoint"} + fi + + #erase the old path line, then insert the parsed path + sed -i "/^path/d" ${initdir}/tmp/$$-kdump.conf + echo "path $_save_path" >> ${initdir}/tmp/$$-kdump.conf +} + +#install kdump.conf and what user specifies in kdump.conf +kdump_install_conf() { + local _opt _val _pdev + (read_strip_comments /etc/kdump.conf) > ${initdir}/tmp/$$-kdump.conf + + while read _opt _val; + do + # remove inline comments after the end of a directive. + case "$_opt" in + raw) + _pdev=$(persistent_policy="by-id" kdump_get_persistent_dev $_val) + sed -i -e "s#^$_opt[[:space:]]\+$_val#$_opt $_pdev#" ${initdir}/tmp/$$-kdump.conf + ;; + ext[234]|xfs|btrfs|minix) + _pdev=$(kdump_get_persistent_dev $_val) + sed -i -e "s#^$_opt[[:space:]]\+$_val#$_opt $_pdev#" ${initdir}/tmp/$$-kdump.conf + ;; + ssh|nfs) + kdump_install_net "$_val" + ;; + dracut_args) + if [[ $(get_dracut_args_fstype "$_val") = nfs* ]] ; then + kdump_install_net "$(get_dracut_args_target "$_val")" + fi + ;; + kdump_pre|kdump_post|extra_bins) + dracut_install $_val + ;; + core_collector) + dracut_install "${_val%%[[:blank:]]*}" + ;; + esac + done <<< "$(read_strip_comments /etc/kdump.conf)" + + kdump_install_pre_post_conf + + default_dump_target_install_conf + + kdump_configure_fence_kdump "${initdir}/tmp/$$-kdump.conf" + inst "${initdir}/tmp/$$-kdump.conf" "/etc/kdump.conf" + rm -f ${initdir}/tmp/$$-kdump.conf +} + +# Default sysctl parameters should suffice for kdump kernel. +# Remove custom configurations sysctl.conf & sysctl.d/* +remove_sysctl_conf() { + + # As custom configurations like vm.min_free_kbytes can lead + # to OOM issues in kdump kernel, avoid them + rm -f "${initdir}/etc/sysctl.conf" + rm -rf "${initdir}/etc/sysctl.d" + rm -rf "${initdir}/run/sysctl.d" + rm -rf "${initdir}/usr/lib/sysctl.d" +} + +kdump_iscsi_get_rec_val() { + + local result + + # The open-iscsi 742 release changed to using flat files in + # /var/lib/iscsi. + + result=$(/sbin/iscsiadm --show -m session -r ${1} | grep "^${2} = ") + result=${result##* = } + echo $result +} + +kdump_get_iscsi_initiator() { + local _initiator + local initiator_conf="/etc/iscsi/initiatorname.iscsi" + + [ -f "$initiator_conf" ] || return 1 + + while read _initiator; do + [ -z "${_initiator%%#*}" ] && continue # Skip comment lines + + case $_initiator in + InitiatorName=*) + initiator=${_initiator#InitiatorName=} + echo "rd.iscsi.initiator=${initiator}" + return 0;; + *) ;; + esac + done < ${initiator_conf} + + return 1 +} + +# Figure out iBFT session according to session type +is_ibft() { + [ "$(kdump_iscsi_get_rec_val $1 "node.discovery_type")" = fw ] +} + +kdump_setup_iscsi_device() { + local path=$1 + local tgt_name; local tgt_ipaddr; + local username; local password; local userpwd_str; + local username_in; local password_in; local userpwd_in_str; + local netroot_str ; local initiator_str; + local netroot_conf="${initdir}/etc/cmdline.d/50iscsi.conf" + local initiator_conf="/etc/iscsi/initiatorname.iscsi" + + dinfo "Found iscsi component $1" + + # Check once before getting explicit values, so we can bail out early, + # e.g. in case of pure-hardware(all-offload) iscsi. + if ! /sbin/iscsiadm -m session -r ${path} &>/dev/null ; then + return 1 + fi + + if is_ibft ${path}; then + return + fi + + # Remove software iscsi cmdline generated by 95iscsi, + # and let kdump regenerate here. + rm -f ${initdir}/etc/cmdline.d/95iscsi.conf + + tgt_name=$(kdump_iscsi_get_rec_val ${path} "node.name") + tgt_ipaddr=$(kdump_iscsi_get_rec_val ${path} "node.conn\[0\].address") + + # get and set username and password details + username=$(kdump_iscsi_get_rec_val ${path} "node.session.auth.username") + [ "$username" == "" ] && username="" + password=$(kdump_iscsi_get_rec_val ${path} "node.session.auth.password") + [ "$password" == "" ] && password="" + username_in=$(kdump_iscsi_get_rec_val ${path} "node.session.auth.username_in") + [ -n "$username" ] && userpwd_str="$username:$password" + + # get and set incoming username and password details + [ "$username_in" == "" ] && username_in="" + password_in=$(kdump_iscsi_get_rec_val ${path} "node.session.auth.password_in") + [ "$password_in" == "" ] && password_in="" + + [ -n "$username_in" ] && userpwd_in_str=":$username_in:$password_in" + + kdump_install_net "$tgt_ipaddr" + + # prepare netroot= command line + # FIXME: Do we need to parse and set other parameters like protocol, port + # iscsi_iface_name, netdev_name, LUN etc. + + if is_ipv6_address $tgt_ipaddr; then + tgt_ipaddr="[$tgt_ipaddr]" + fi + netroot_str="netroot=iscsi:${userpwd_str}${userpwd_in_str}@$tgt_ipaddr::::$tgt_name" + + [[ -f $netroot_conf ]] || touch $netroot_conf + + # If netroot target does not exist already, append. + if ! grep -q $netroot_str $netroot_conf; then + echo $netroot_str >> $netroot_conf + dinfo "Appended $netroot_str to $netroot_conf" + fi + + # Setup initator + initiator_str=$(kdump_get_iscsi_initiator) + [ $? -ne "0" ] && derror "Failed to get initiator name" && return 1 + + # If initiator details do not exist already, append. + if ! grep -q "$initiator_str" $netroot_conf; then + echo "$initiator_str" >> $netroot_conf + dinfo "Appended "$initiator_str" to $netroot_conf" + fi +} + +kdump_check_iscsi_targets () { + # If our prerequisites are not met, fail anyways. + type -P iscsistart >/dev/null || return 1 + + kdump_check_setup_iscsi() ( + local _dev + _dev=$1 + + [[ -L /sys/dev/block/$_dev ]] || return + cd "$(readlink -f /sys/dev/block/$_dev)" + until [[ -d sys || -d iscsi_session ]]; do + cd .. + done + [[ -d iscsi_session ]] && kdump_setup_iscsi_device "$PWD" + ) + + [[ $hostonly ]] || [[ $mount_needs ]] && { + for_each_host_dev_and_slaves_all kdump_check_setup_iscsi + } +} + +# hostname -a is deprecated, do it by ourself +get_alias() { + local ips + local entries + local alias_set + + ips=$(hostname -I) + for ip in $ips + do + # in /etc/hosts, alias can come at the 2nd column + entries=$(grep $ip /etc/hosts | awk '{ $1=""; print $0 }') + if [ $? -eq 0 ]; then + alias_set="$alias_set $entries" + fi + done + + echo $alias_set +} + +is_localhost() { + local hostnames=$(hostname -A) + local shortnames=$(hostname -A -s) + local aliasname=$(get_alias) + local nodename=$1 + + hostnames="$hostnames $shortnames $aliasname" + + for name in ${hostnames}; do + if [ "$name" == "$nodename" ]; then + return 0 + fi + done + return 1 +} + +# retrieves fence_kdump nodes from Pacemaker cluster configuration +get_pcs_fence_kdump_nodes() { + local nodes + + pcs cluster sync > /dev/null 2>&1 && pcs cluster cib-upgrade > /dev/null 2>&1 + # get cluster nodes from cluster cib, get interface and ip address + nodelist=`pcs cluster cib | xmllint --xpath "/cib/status/node_state/@uname" -` + + # nodelist is formed as 'uname="node1" uname="node2" ... uname="nodeX"' + # we need to convert each to node1, node2 ... nodeX in each iteration + for node in ${nodelist}; do + # convert $node from 'uname="nodeX"' to 'nodeX' + eval $node + nodename=$uname + # Skip its own node name + if is_localhost $nodename; then + continue + fi + nodes="$nodes $nodename" + done + + echo $nodes +} + +# retrieves fence_kdump args from config file +get_pcs_fence_kdump_args() { + if [ -f $FENCE_KDUMP_CONFIG_FILE ]; then + . $FENCE_KDUMP_CONFIG_FILE + echo $FENCE_KDUMP_OPTS + fi +} + +get_generic_fence_kdump_nodes() { + local filtered + local nodes + + nodes=$(get_option_value "fence_kdump_nodes") + for node in ${nodes}; do + # Skip its own node name + if is_localhost $node; then + continue + fi + filtered="$filtered $node" + done + echo $filtered +} + +# setup fence_kdump in cluster +# setup proper network and install needed files +kdump_configure_fence_kdump () { + local kdump_cfg_file=$1 + local nodes + local args + + if is_generic_fence_kdump; then + nodes=$(get_generic_fence_kdump_nodes) + + elif is_pcs_fence_kdump; then + nodes=$(get_pcs_fence_kdump_nodes) + + # set appropriate options in kdump.conf + echo "fence_kdump_nodes $nodes" >> ${kdump_cfg_file} + + args=$(get_pcs_fence_kdump_args) + if [ -n "$args" ]; then + echo "fence_kdump_args $args" >> ${kdump_cfg_file} + fi + + else + # fence_kdump not configured + return 1 + fi + + # setup network for each node + for node in ${nodes}; do + kdump_install_net $node + done + + dracut_install /etc/hosts + dracut_install /etc/nsswitch.conf + dracut_install $FENCE_KDUMP_SEND +} + +# Install a random seed used to feed /dev/urandom +# By the time kdump service starts, /dev/uramdom is already fed by systemd +kdump_install_random_seed() { + local poolsize=`cat /proc/sys/kernel/random/poolsize` + + if [ ! -d ${initdir}/var/lib/ ]; then + mkdir -p ${initdir}/var/lib/ + fi + + dd if=/dev/urandom of=${initdir}/var/lib/random-seed \ + bs=$poolsize count=1 2> /dev/null +} + +kdump_install_systemd_conf() { + local failure_action=$(get_option_value "failure_action") + + # Kdump turns out to require longer default systemd mount timeout + # than 1st kernel(90s by default), we use default 300s for kdump. + grep -r "^[[:space:]]*DefaultTimeoutStartSec=" ${initdir}/etc/systemd/system.conf* &>/dev/null + if [ $? -ne 0 ]; then + mkdir -p ${initdir}/etc/systemd/system.conf.d + echo "[Manager]" > ${initdir}/etc/systemd/system.conf.d/kdump.conf + echo "DefaultTimeoutStartSec=300s" >> ${initdir}/etc/systemd/system.conf.d/kdump.conf + fi + + # Forward logs to console directly, and don't read Kmsg, this avoids + # unneccessary memory consumption and make console output more useful. + # Only do so for non fadump image. + mkdir -p ${initdir}/etc/systemd/journald.conf.d + echo "[Journal]" > ${initdir}/etc/systemd/journald.conf.d/kdump.conf + echo "Storage=volatile" >> ${initdir}/etc/systemd/journald.conf.d/kdump.conf + echo "ReadKMsg=no" >> ${initdir}/etc/systemd/journald.conf.d/kdump.conf + echo "ForwardToConsole=yes" >> ${initdir}/etc/systemd/journald.conf.d/kdump.conf +} + +install() { + kdump_module_init + kdump_install_conf + remove_sysctl_conf + + if is_ssh_dump_target; then + kdump_install_random_seed + fi + dracut_install -o /etc/adjtime /etc/localtime + inst "$moddir/monitor_dd_progress" "/kdumpscripts/monitor_dd_progress" + chmod +x ${initdir}/kdumpscripts/monitor_dd_progress + inst "/bin/dd" "/bin/dd" + inst "/bin/tail" "/bin/tail" + inst "/bin/date" "/bin/date" + inst "/bin/sync" "/bin/sync" + inst "/bin/cut" "/bin/cut" + inst "/bin/head" "/bin/head" + inst "/bin/awk" "/bin/awk" + inst "/bin/sed" "/bin/sed" + inst "/sbin/makedumpfile" "/sbin/makedumpfile" + inst "/sbin/vmcore-dmesg" "/sbin/vmcore-dmesg" + inst "/usr/bin/printf" "/sbin/printf" + inst "/usr/bin/logger" "/sbin/logger" + inst "/usr/bin/chmod" "/sbin/chmod" + inst "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh" + inst "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump-lib-initramfs.sh" + inst "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh" + inst "$moddir/kdump.sh" "/usr/bin/kdump.sh" + inst "$moddir/kdump-capture.service" "$systemdsystemunitdir/kdump-capture.service" + systemctl -q --root "$initdir" add-wants initrd.target kdump-capture.service + inst "$moddir/kdump-error-handler.sh" "/usr/bin/kdump-error-handler.sh" + # Replace existing emergency service and emergency target + cp "$moddir/kdump-emergency.service" "$initdir/$systemdsystemunitdir/emergency.service" + cp "$moddir/kdump-emergency.target" "$initdir/$systemdsystemunitdir/emergency.target" + # Also redirect dracut-emergency to kdump error handler + ln_r "$systemdsystemunitdir/emergency.service" "$systemdsystemunitdir/dracut-emergency.service" + + # Check for all the devices and if any device is iscsi, bring up iscsi + # target. Ideally all this should be pushed into dracut iscsi module + # at some point of time. + kdump_check_iscsi_targets + + kdump_install_systemd_conf + + # nfs/ssh dump will need to get host ip in second kernel and need to call 'ip' tool, see get_host_ip for more detail + if is_nfs_dump_target || is_ssh_dump_target; then + inst "ip" + fi + + # For the lvm type target under kdump, in /etc/lvm/lvm.conf we can + # safely replace "reserved_memory=XXXX"(default value is 8192) with + # "reserved_memory=1024" to lower memory pressure under kdump. We do + # it unconditionally here, if "/etc/lvm/lvm.conf" doesn't exist, it + # actually does nothing. + sed -i -e \ + 's/\(^[[:space:]]*reserved_memory[[:space:]]*=\)[[:space:]]*[[:digit:]]*/\1 1024/' \ + ${initdir}/etc/lvm/lvm.conf &>/dev/null + + # Save more memory by dropping switch root capability + dracut_no_switch_root +} diff --git a/SOURCES/dracut-monitor_dd_progress b/SOURCES/dracut-monitor_dd_progress new file mode 100644 index 0000000..e139d33 --- /dev/null +++ b/SOURCES/dracut-monitor_dd_progress @@ -0,0 +1,28 @@ +#!/bin/sh + +SRC_FILE_MB=$1 + +while true +do + DD_PID=`pidof dd` + if [ -n "$DD_PID" ]; then + break + fi +done + +while true +do + sleep 5 + if [ ! -d /proc/$DD_PID ]; then + break + fi + + kill -s USR1 $DD_PID + CURRENT_SIZE=`tail -n 1 /tmp/dd_progress_file | sed "s/[^0-9].*//g"` + [ -n "$CURRENT_SIZE" ] && { + CURRENT_MB=$(($CURRENT_SIZE / 1048576)) + echo -e "Copied $CURRENT_MB MB / $SRC_FILE_MB MB\r" + } +done + +rm -f /tmp/dd_progress_file diff --git a/SOURCES/early-kdump-howto.txt b/SOURCES/early-kdump-howto.txt new file mode 100644 index 0000000..68b23c7 --- /dev/null +++ b/SOURCES/early-kdump-howto.txt @@ -0,0 +1,95 @@ +Early Kdump HOWTO + +Introduction +------------ + +Early kdump is a mechanism to make kdump operational earlier than normal kdump +service. The kdump service starts early enough for general crash cases, but +there are some cases where it has no chance to make kdump operational in boot +sequence, such as detecting devices and starting early services. If you hit +such a case, early kdump may allow you to get more information of it. + +Early kdump is implemented as a dracut module. It adds a kernel (vmlinuz) and +initramfs for kdump to your system's initramfs in order to load them as early +as possible. After that, if you provide "rd.earlykdump" in kernel command line, +then in the initramfs, early kdump will load those files like the normal kdump +service. This is disabled by default. + +For the normal kdump service, it can check whether the early kdump has loaded +the crash kernel and initramfs. It has no conflict with the early kdump. + +How to configure early kdump +---------------------------- + +We assume if you're reading this document, you should already have kexec-tools +installed. + +You can rebuild the initramfs with earlykdump support with below steps: + +1. start kdump service to make sure kdump initramfs is created. + + # systemctl start kdump + + NOTE: If a crash occurs during boot process, early kdump captures a vmcore + and reboot the system by default, so the system might go into crash loop. + You can avoid such a crash loop by adding the following settings, which + power off the system after dump capturing, to kdump.conf in advance: + + final_action poweroff + failure_action poweroff + + For the failure_action, you can choose anything other than "reboot". + +2. rebuild system initramfs with earlykdump support. + + # dracut --force --add earlykdump + + NOTE: Recommend to backup the original system initramfs before performing + this step to put it back if something happens during boot-up. + +3. add rd.earlykdump in grub kernel command line. + +After making said changes, reboot your system to take effect. Of course, if you +want to disable early kdump, you can simply remove "rd.earlykdump" from kernel +boot parameters in grub, and reboot system like above. + +Once the boot is completed, you can check the status of the early kdump support +on the command prompt: + + # journalctl -b | grep early-kdump + +Then, you will see some useful logs, for example: + +- if early kdump is successful. + +Mar 09 09:57:56 localhost dracut-cmdline[190]: early-kdump is enabled. +Mar 09 09:57:56 localhost dracut-cmdline[190]: kexec: loaded early-kdump kernel + +- if early kdump is disabled. + +Mar 09 10:02:47 localhost dracut-cmdline[189]: early-kdump is disabled. + +Notes +----- + +- The size of early kdump initramfs will be large because it includes vmlinuz + and kdump initramfs. + +- Early kdump inherits the settings of normal kdump, so any changes that + caused normal kdump rebuilding also require rebuilding the system initramfs + to make sure that the changes take effect for early kdump. Therefore, after + the rebuilding of kdump initramfs is completed, provide a prompt message to + tell the fact. + +- If you install an updated kernel and reboot the system with it, the early + kdump will be disabled by default. To enable it with the new kernel, you + need to take the above steps again. + +Limitation +---------- + +- At present, early kdump doesn't support fadump. + +- Early kdump loads a crash kernel and initramfs at the beginning of the + process in system's initramfs, so a crash at earlier than that (e.g. in + kernel initialization) cannot be captured even with the early kdump. diff --git a/SOURCES/fadump-howto.txt b/SOURCES/fadump-howto.txt new file mode 100644 index 0000000..111586c --- /dev/null +++ b/SOURCES/fadump-howto.txt @@ -0,0 +1,351 @@ +Firmware assisted dump (fadump) HOWTO + +Introduction + +Firmware assisted dump is a new feature in the 3.4 mainline kernel supported +only on powerpc architecture. The goal of firmware-assisted dump is to enable +the dump of a crashed system, and to do so from a fully-reset system, and to +minimize the total elapsed time until the system is back in production use. A +complete documentation on implementation can be found at +Documentation/powerpc/firmware-assisted-dump.txt in upstream linux kernel tree +from 3.4 version and above. + +Please note that the firmware-assisted dump feature is only available on Power6 +and above systems with recent firmware versions. + +Overview + +Fadump + +Fadump is a robust kernel crash dumping mechanism to get reliable kernel crash +dump with assistance from firmware. This approach does not use kexec, instead +firmware assists in booting the kdump kernel while preserving memory contents. +Unlike kdump, the system is fully reset, and loaded with a fresh copy of the +kernel. In particular, PCI and I/O devices are reinitialized and are in a +clean, consistent state. This second kernel, often called a capture kernel, +boots with very little memory and captures the dump image. + +The first kernel registers the sections of memory with the Power firmware for +dump preservation during OS initialization. These registered sections of memory +are reserved by the first kernel during early boot. When a system crashes, the +Power firmware fully resets the system, preserves all the system memory +contents, save the low memory (boot memory of size larger of 5% of system +RAM or 256MB) of RAM to the previous registered region. It will also save +system registers, and hardware PTE's. + +Fadump is supported only on ppc64 platform. The standard kernel and capture +kernel are one and the same on ppc64. + +If you're reading this document, you should already have kexec-tools +installed. If not, you install it via the following command: + + # yum install kexec-tools + +Fadump Operational Flow: + +Like kdump, fadump also exports the ELF formatted kernel crash dump through +/proc/vmcore. Hence existing kdump infrastructure can be used to capture fadump +vmcore. The idea is to keep the functionality transparent to end user. From +user perspective there is no change in the way kdump init script works. + +However, unlike kdump, fadump does not pre-load kdump kernel and initrd into +reserved memory, instead it always uses default OS initrd during second boot +after crash. Hence, for fadump, we rebuild the new kdump initrd and replace it +with default initrd. Before replacing existing default initrd we take a backup +of original default initrd for user's reference. The dracut package has been +enhanced to rebuild the default initrd with vmcore capture steps. The initrd +image is rebuilt as per the configuration in /etc/kdump.conf file. + +The control flow of fadump works as follows: +01. System panics. +02. At the crash, kernel informs power firmware that kernel has crashed. +03. Firmware takes the control and reboots the entire system preserving + only the memory (resets all other devices). +04. The reboot follows the normal booting process (non-kexec). +05. The boot loader loads the default kernel and initrd from /boot +06. The default initrd loads and runs /init +07. dracut-kdump.sh script present in fadump aware default initrd checks if + '/proc/device-tree/rtas/ibm,kernel-dump' file exists before executing + steps to capture vmcore. + (This check will help to bypass the vmcore capture steps during normal boot + process.) +09. Captures dump according to /etc/kdump.conf +10. Is dump capture successful (yes goto 12, no goto 11) +11. Perform the failure action specified in /etc/kdump.conf + (The default failure action is reboot, if unspecified) +12. Perform the final action specified in /etc/kdump.conf + (The default final action is reboot, if unspecified) + + +How to configure fadump: + +Again, we assume if you're reading this document, you should already have +kexec-tools installed. If not, you install it via the following command: + + # yum install kexec-tools + +Make the kernel to be configured with FADump as the default boot entry, if +it isn't already: + + # grubby --set-default=/boot/vmlinuz- + +Boot into the kernel to be configured for FADump. To be able to do much of +anything interesting in the way of debug analysis, you'll also need to install +the kernel-debuginfo package, of the same arch as your running kernel, and the +crash utility: + + # yum --enablerepo=\*debuginfo install kernel-debuginfo.$(uname -m) crash + +Next up, we need to modify some boot parameters to enable firmware assisted +dump. With the help of grubby, it's very easy to append "fadump=on" to the end +of your kernel boot parameters. To reserve the appropriate amount of memory +for boot memory preservation, pass 'crashkernel=X' kernel cmdline parameter. +For the recommended value of X, see 'FADump Memory Requirements' section. + + # grubby --args="fadump=on crashkernel=6G" --update-kernel=/boot/vmlinuz-`uname -r` + +By default, FADump reserved memory will be initialized as CMA area to make the +memory available through CMA allocator on the production kernel. We can opt out +of this, making reserved memory unavailable to production kernel, by booting the +linux kernel with 'fadump=nocma' instead of 'fadump=on'. + +The term 'boot memory' means size of the low memory chunk that is required for +a kernel to boot successfully when booted with restricted memory. By default, +the boot memory size will be the larger of 5% of system RAM or 256MB. +Alternatively, user can also specify boot memory size through boot parameter +'fadump_reserve_mem=' which will override the default calculated size. Use this +option if default boot memory size is not sufficient for second kernel to boot +successfully. + +After making said changes, reboot your system, so that the specified memory is +reserved and left untouched by the normal system. Take note that the output of +'free -m' will show X MB less memory than without this parameter, which is +expected. If you see OOM (Out Of Memory) error messages while loading capture +kernel, then you should bump up the memory reservation size. + +Now that you've got that reserved memory region set up, you want to turn on +the kdump init script: + + # systemctl enable kdump.service + +Then, start up kdump as well: + + # systemctl start kdump.service + +This should turn on the firmware assisted functionality in kernel by +echo'ing 1 to /sys/kernel/fadump_registered, leaving the system ready +to capture a vmcore upon crashing. For journaling filesystems like XFS an +additional step is required to ensure bootloader does not pick the +older initrd (without vmcore capture scripts): + + * If /boot is a separate partition, run the below commands as the root user, + or as a user with CAP_SYS_ADMIN rights: + + # fsfreeze -f + # fsfreeze -u + + * If /boot is not a separate partition, reboot the system. + +After reboot check if the kdump service is up and running with: + + # systemctl status kdump.service + +To test out whether FADump is configured properly, you can force-crash your +system by echo'ing a 'c' into /proc/sysrq-trigger: + + # echo c > /proc/sysrq-trigger + +You should see some panic output, followed by the system reset and booting into +fresh copy of kernel. When default initrd loads and runs /init, vmcore should +be copied out to disk (by default, in /var/crash//vmcore), +then the system rebooted back into your normal kernel. + +Once back to your normal kernel, you can use the previously installed crash +kernel in conjunction with the previously installed kernel-debuginfo to +perform postmortem analysis: + + # crash /usr/lib/debug/lib/modules/2.6.17-1.2621.el5/vmlinux + /var/crash/2006-08-23-15:34/vmcore + + crash> bt + +and so on... + +Saving vmcore-dmesg.txt +----------------------- +Kernel log bufferes are one of the most important information available +in vmcore. Now before saving vmcore, kernel log bufferes are extracted +from /proc/vmcore and saved into a file vmcore-dmesg.txt. After +vmcore-dmesg.txt, vmcore is saved. Destination disk and directory for +vmcore-dmesg.txt is same as vmcore. Note that kernel log buffers will +not be available if dump target is raw device. + +FADump Memory Requirements: + + System Memory Recommended memory +--------------------- ---------------------- + 4 GB - 16 GB : 768 MB + 16 GB - 64 GB : 1024 MB + 64 GB - 128 GB : 2 GB + 128 GB - 1 TB : 4 GB + 1 TB - 2 TB : 6 GB + 2 TB - 4 TB : 12 GB + 4 TB - 8 TB : 20 GB + 8 TB - 16 TB : 36 GB + 16 TB - 32 TB : 64 GB + 32 TB - 64 TB : 128 GB + 64 TB & above : 180 GB + +Things to remember: + +1) The memory required to boot capture Kernel is a moving target that depends + on many factors like hardware attached to the system, kernel and modules in + use, packages installed and services enabled, there is no one-size-fits-all. + But the above recommendations are based on system memory. So, the above + recommendations for FADump come with a few assumptions, based on available + system memory, about the resources the system could have. So, please take + the recommendations with a pinch of salt and remember to try capturing dump + a few times to confirm that the system is configured successfully with dump + capturing support. + +2) Though the memory requirements for FADump seem high, this memory is not + completely set aside but made available for userspace applications to use, + through the CMA allocator. + +3) As the same initrd is used for booting production kernel as well as capture + kernel and with dump being captured in a restricted memory environment, few + optimizations (like not inclding network dracut module, disabling multipath + and such) are applied while building the initrd. In case, the production + environment needs these optimizations to be avoided, dracut_args option in + /etc/kdump.conf file could be leveraged. For example, if a user wishes for + network module to be included in the initrd, adding the below entry in + /etc/kdump.conf file and restarting kdump service would take care of it. + + dracut_args --add "network" + +4) If FADump is configured to capture vmcore to a remote dump target using SSH + or NFS protocol, the network interface is renamed to kdump- + if is generic, for example, *eth#, or net#. This problem + occurs because the vmcore capture scripts in the initial RAM disk (initrd) + add the kdump- prefix to the network interface name to secure persistent + naming. As the same initrd is used for production kernel boot, the interface + name is changed for the production kernel too. + +Dump Triggering methods: + +This section talks about the various ways, other than a Kernel Panic, in which +fadump can be triggered. The following methods assume that fadump is configured +on your system, with the scripts enabled as described in the section above. + +1) AltSysRq C + +FAdump can be triggered with the combination of the 'Alt','SysRq' and 'C' +keyboard keys. Please refer to the following link for more details: + +https://fedoraproject.org/wiki/QA/Sysrq + +In addition, on PowerPC boxes, fadump can also be triggered via Hardware +Management Console(HMC) using 'Ctrl', 'O' and 'C' keyboard keys. + +2) Kernel OOPs + +If we want to generate a dump everytime the Kernel OOPses, we can achieve this +by setting the 'Panic On OOPs' option as follows: + + # echo 1 > /proc/sys/kernel/panic_on_oops + +3) PowerPC specific methods: + +On IBM PowerPC machines, issuing a soft reset invokes the XMON debugger(if +XMON is configured). To configure XMON one needs to compile the kernel with +the CONFIG_XMON and CONFIG_XMON_DEFAULT options, or by compiling with +CONFIG_XMON and booting the kernel with xmon=on option. + +Following are the ways to remotely issue a soft reset on PowerPC boxes, which +would drop you to XMON. Pressing a 'X' (capital alphabet X) followed by an +'Enter' here will trigger the dump. + +3.1) HMC + +Hardware Management Console(HMC) available on Power4 and Power5 machines allow +partitions to be reset remotely. This is specially useful in hang situations +where the system is not accepting any keyboard inputs. + +Once you have HMC configured, the following steps will enable you to trigger +fadump via a soft reset: + +On Power4 + Using GUI + + * In the right pane, right click on the partition you wish to dump. + * Select "Operating System->Reset". + * Select "Soft Reset". + * Select "Yes". + + Using HMC Commandline + + # reset_partition -m -p -t soft + +On Power5 + Using GUI + + * In the right pane, right click on the partition you wish to dump. + * Select "Restart Partition". + * Select "Dump". + * Select "OK". + + Using HMC Commandline + + # chsysstate -m -n -o dumprestart -r lpar + +3.2) Blade Management Console for Blade Center + +To initiate a dump operation, go to Power/Restart option under "Blade Tasks" in +the Blade Management Console. Select the corresponding blade for which you want +to initate the dump and then click "Restart blade with NMI". This issues a +system reset and invokes xmon debugger. + + +Advanced Setups & Failure action: + +Kdump and fadump exhibit similar behavior in terms of setup & failure action. +For fadump advanced setup related information see section "Advanced Setups" in +"kexec-kdump-howto.txt" document. Refer to "Failure action" section in "kexec- +kdump-howto.txt" document for fadump failure action related information. + +Compression and filtering + +Refer "Compression and filtering" section in "kexec-kdump-howto.txt" document. +Compression and filtering are same for kdump & fadump. + + +Notes on rootfs mount: +Dracut is designed to mount rootfs by default. If rootfs mounting fails it +will refuse to go on. So fadump leaves rootfs mounting to dracut currently. +We make the assumtion that proper root= cmdline is being passed to dracut +initramfs for the time being. If you need modify "KDUMP_COMMANDLINE=" in +/etc/sysconfig/kdump, you will need to make sure that appropriate root= +options are copied from /proc/cmdline. In general it is best to append +command line options using "KDUMP_COMMANDLINE_APPEND=" instead of replacing +the original command line completely. + +How to disable FADump: + +Remove "fadump=on"/"fadump=nocma" from kernel cmdline parameters OR replace +it with "fadump=off" kernel cmdline parameter: + + # grubby --update-kernel=/boot/vmlinuz-`uname -r` --remove-args="fadump=on" +or + # grubby --update-kernel=/boot/vmlinuz-`uname -r` --remove-args="fadump=nocma" +OR + # grubby --update-kernel=/boot/vmlinuz-`uname -r` --args="fadump=off" + +Remove "crashkernel=" from kernel cmdline parameters: + + # grubby --update-kernel=/boot/vmlinuz-`uname -r` --remove-args="crashkernel" + +If KDump is to be used as the dump capturing mechanism, reset the crashkernel parameter: + + # kdumpctl reset-crashkernel `uname -r` + +Reboot the system for the settings to take effect. diff --git a/SOURCES/kdump-dep-generator.sh b/SOURCES/kdump-dep-generator.sh new file mode 100644 index 0000000..f48c8f6 --- /dev/null +++ b/SOURCES/kdump-dep-generator.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# More details about systemd generator: +# http://www.freedesktop.org/wiki/Software/systemd/Generators/ + +. /usr/lib/kdump/kdump-lib.sh +. /usr/lib/kdump/kdump-logger.sh + +# If invokded with no arguments for testing purpose, output to /tmp to +# avoid overriding the existing. +dest_dir="/tmp" + +if [ -n "$1" ]; then + dest_dir=$1 +fi + +systemd_dir=/usr/lib/systemd/system +kdump_wants=$dest_dir/kdump.service.wants + +if is_ssh_dump_target; then + mkdir -p $kdump_wants + ln -sf $systemd_dir/network-online.target $kdump_wants/ +fi diff --git a/SOURCES/kdump-in-cluster-environment.txt b/SOURCES/kdump-in-cluster-environment.txt new file mode 100644 index 0000000..de1eb5e --- /dev/null +++ b/SOURCES/kdump-in-cluster-environment.txt @@ -0,0 +1,91 @@ +Kdump-in-cluster-environment HOWTO + +Introduction + +Kdump is a kexec based crash dumping mechansim for Linux. This docuement +illustrate how to configure kdump in cluster environment to allow the kdump +crash recovery service complete without being preempted by traditional power +fencing methods. + +Overview + +Kexec/Kdump + +Details about Kexec/Kdump are available in Kexec-Kdump-howto file and will not +be described here. + +fence_kdump + +fence_kdump is an I/O fencing agent to be used with the kdump crash recovery +service. When the fence_kdump agent is invoked, it will listen for a message +from the failed node that acknowledges that the failed node is executing the +kdump crash kernel. Note that fence_kdump is not a replacement for traditional +fencing methods. The fence_kdump agent can only detect that a node has entered +the kdump crash recovery service. This allows the kdump crash recovery service +complete without being preempted by traditional power fencing methods. + +fence_kdump_send + +fence_kdump_send is a utility used to send messages that acknowledge that the +node itself has entered the kdump crash recovery service. The fence_kdump_send +utility is typically run in the kdump kernel after a cluster node has +encountered a kernel panic. Once the cluster node has entered the kdump crash +recovery service, fence_kdump_send will periodically send messages to all +cluster nodes. When the fence_kdump agent receives a valid message from the +failed nodes, fencing is complete. + +How to configure Pacemaker cluster environment: + +If we want to use kdump in Pacemaker cluster environment, fence-agents-kdump +should be installed in every nodes in the cluster. You can achieve this via +the following command: + + # yum install -y fence-agents-kdump + +Next is to add kdump_fence to the cluster. Assuming that the cluster consists +of three nodes, they are node1, node2 and node3, and use Pacemaker to perform +resource management and pcs as cli configuration tool. + +With pcs it is easy to add a stonith resource to the cluster. For example, add +a stonith resource named mykdumpfence with fence type of fence_kdump via the +following commands: + + # pcs stonith create mykdumpfence fence_kdump \ + pcmk_host_check=static-list pcmk_host_list="node1 node2 node3" + # pcs stonith update mykdumpfence pcmk_monitor_action=metadata --force + # pcs stonith update mykdumpfence pcmk_status_action=metadata --force + # pcs stonith update mykdumpfence pcmk_reboot_action=off --force + +Then enable stonith + # pcs property set stonith-enabled=true + +How to configure kdump: + +Actually there are two ways how to configure fence_kdump support: + +1) Pacemaker based clusters + If you have successfully configured fence_kdump in Pacemaker, there is + no need to add some special configuration in kdump. So please refer to + Kexec-Kdump-howto file for more information. + +2) Generic clusters + For other types of clusters there are two configuration options in + kdump.conf which enables fence_kdump support: + + fence_kdump_nodes + Contains list of cluster node(s) separated by space to send + fence_kdump notification to (this option is mandatory to enable + fence_kdump) + + fence_kdump_args + Command line arguments for fence_kdump_send (it can contain + all valid arguments except hosts to send notification to) + + These options will most probably be configured by your cluster software, + so please refer to your cluster documentation how to enable fence_kdump + support. + +Please be aware that these two ways cannot be combined and 2) has precedence +over 1). It means that if fence_kdump is configured using fence_kdump_nodes +and fence_kdump_args options in kdump.conf, Pacemaker configuration is not +used even if it exists. diff --git a/SOURCES/kdump-lib-initramfs.sh b/SOURCES/kdump-lib-initramfs.sh new file mode 100755 index 0000000..319f9a0 --- /dev/null +++ b/SOURCES/kdump-lib-initramfs.sh @@ -0,0 +1,293 @@ +# These variables and functions are useful in 2nd kernel + +. /lib/kdump-lib.sh +. /lib/kdump-logger.sh + +KDUMP_PATH="/var/crash" +KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log" +CORE_COLLECTOR="" +DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 7 -d 31" +DMESG_COLLECTOR="/sbin/vmcore-dmesg" +FAILURE_ACTION="systemctl reboot -f" +DATEDIR=`date +%Y-%m-%d-%T` +HOST_IP='127.0.0.1' +DUMP_INSTRUCTION="" +SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa" +KDUMP_SCRIPT_DIR="/kdumpscripts" +DD_BLKSIZE=512 +FINAL_ACTION="systemctl reboot -f" +KDUMP_CONF="/etc/kdump.conf" +KDUMP_PRE="" +KDUMP_POST="" +NEWROOT="/sysroot" +OPALCORE="/sys/firmware/opal/mpipl/core" + +#initiate the kdump logger +dlog_init +if [ $? -ne 0 ]; then + echo "failed to initiate the kdump logger." + exit 1 +fi + +get_kdump_confs() +{ + local config_opt config_val + + while read config_opt config_val; + do + # remove inline comments after the end of a directive. + case "$config_opt" in + path) + KDUMP_PATH="$config_val" + ;; + core_collector) + [ -n "$config_val" ] && CORE_COLLECTOR="$config_val" + ;; + sshkey) + if [ -f "$config_val" ]; then + SSH_KEY_LOCATION=$config_val + fi + ;; + kdump_pre) + KDUMP_PRE="$config_val" + ;; + kdump_post) + KDUMP_POST="$config_val" + ;; + fence_kdump_args) + FENCE_KDUMP_ARGS="$config_val" + ;; + fence_kdump_nodes) + FENCE_KDUMP_NODES="$config_val" + ;; + failure_action|default) + case $config_val in + shell) + FAILURE_ACTION="kdump_emergency_shell" + ;; + reboot) + FAILURE_ACTION="systemctl reboot -f && exit" + ;; + halt) + FAILURE_ACTION="halt && exit" + ;; + poweroff) + FAILURE_ACTION="systemctl poweroff -f && exit" + ;; + dump_to_rootfs) + FAILURE_ACTION="dump_to_rootfs" + ;; + esac + ;; + final_action) + case $config_val in + reboot) + FINAL_ACTION="systemctl reboot -f" + ;; + halt) + FINAL_ACTION="halt" + ;; + poweroff) + FINAL_ACTION="systemctl poweroff -f" + ;; + esac + ;; + esac + done <<< "$(read_strip_comments $KDUMP_CONF)" + + if [ -z "$CORE_COLLECTOR" ]; then + CORE_COLLECTOR="$DEFAULT_CORE_COLLECTOR" + if is_ssh_dump_target || is_raw_dump_target; then + CORE_COLLECTOR="$CORE_COLLECTOR -F" + fi + fi +} + +# store the kexec kernel log to a file. +save_log() +{ + dmesg -T > $KDUMP_LOG_FILE + + if command -v journalctl > /dev/null; then + journalctl -ab >> $KDUMP_LOG_FILE + fi + chmod 600 $KDUMP_LOG_FILE +} + +# dump_fs +dump_fs() +{ + local _exitcode + local _mp=$1 + local _op=$(get_mount_info OPTIONS target $_mp -f) + ddebug "dump_fs _mp=$_mp _opts=$_op" + + if ! is_mounted "$_mp"; then + dinfo "dump path \"$_mp\" is not mounted, trying to mount..." + mount --target $_mp + if [ $? -ne 0 ]; then + derror "failed to dump to \"$_mp\", it's not a mount point!" + return 1 + fi + fi + + # Remove -F in makedumpfile case. We don't want a flat format dump here. + [[ $CORE_COLLECTOR = *makedumpfile* ]] && CORE_COLLECTOR=`echo $CORE_COLLECTOR | sed -e "s/-F//g"` + + local _dump_path=$(echo "$_mp/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /) + + dinfo "saving to $_dump_path" + + # Only remount to read-write mode if the dump target is mounted read-only. + if [[ "$_op" = "ro"* ]]; then + dinfo "Remounting the dump target in rw mode." + mount -o remount,rw $_mp || return 1 + fi + + mkdir -p $_dump_path || return 1 + + save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_path" + save_opalcore_fs "$_dump_path" + + dinfo "saving vmcore" + $CORE_COLLECTOR /proc/vmcore $_dump_path/vmcore-incomplete + _exitcode=$? + if [ $_exitcode -eq 0 ]; then + mv $_dump_path/vmcore-incomplete $_dump_path/vmcore + sync + dinfo "saving vmcore complete" + else + derror "saving vmcore failed, _exitcode:$_exitcode" + fi + + dinfo "saving the $KDUMP_LOG_FILE to $_dump_path/" + save_log + mv $KDUMP_LOG_FILE $_dump_path/ + if [ $_exitcode -ne 0 ]; then + return 1 + fi + + # improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure + return 0 +} + +save_vmcore_dmesg_fs() { + local _dmesg_collector=$1 + local _path=$2 + + dinfo "saving vmcore-dmesg.txt to ${_path}" + $_dmesg_collector /proc/vmcore > ${_path}/vmcore-dmesg-incomplete.txt + _exitcode=$? + if [ $_exitcode -eq 0 ]; then + mv ${_path}/vmcore-dmesg-incomplete.txt ${_path}/vmcore-dmesg.txt + chmod 600 ${_path}/vmcore-dmesg.txt + + # Make sure file is on disk. There have been instances where later + # saving vmcore failed and system rebooted without sync and there + # was no vmcore-dmesg.txt available. + sync + dinfo "saving vmcore-dmesg.txt complete" + else + if [ -f ${_path}/vmcore-dmesg-incomplete.txt ]; then + chmod 600 ${_path}/vmcore-dmesg-incomplete.txt + fi + derror "saving vmcore-dmesg.txt failed" + fi +} + +save_opalcore_fs() { + local _path=$1 + + if [ ! -f $OPALCORE ]; then + # Check if we are on an old kernel that uses a different path + if [ -f /sys/firmware/opal/core ]; then + OPALCORE="/sys/firmware/opal/core" + else + return 0 + fi + fi + + dinfo "saving opalcore:$OPALCORE to ${_path}/opalcore" + cp $OPALCORE ${_path}/opalcore + if [ $? -ne 0 ]; then + derror "saving opalcore failed" + return 1 + fi + + sync + dinfo "saving opalcore complete" + return 0 +} + +dump_to_rootfs() +{ + + if [[ $(systemctl status dracut-initqueue | sed -n "s/^\s*Active: \(\S*\)\s.*$/\1/p") == "inactive" ]]; then + dinfo "Trying to bring up initqueue for rootfs mount" + systemctl start dracut-initqueue + fi + + dinfo "Clean up dead systemd services" + systemctl cancel + dinfo "Waiting for rootfs mount, will timeout after 90 seconds" + systemctl start --no-block sysroot.mount + + _loop=0 + while [ $_loop -lt 90 ] && ! is_mounted /sysroot; do + sleep 1 + _loop=$((_loop + 1)) + done + + if ! is_mounted /sysroot; then + derror "Failed to mount rootfs" + return + fi + + ddebug "NEWROOT=$NEWROOT" + dump_fs $NEWROOT +} + +kdump_emergency_shell() +{ + ddebug "Switching to kdump emergency shell..." + + [ -f /etc/profile ] && . /etc/profile + export PS1='kdump:${PWD}# ' + + . /lib/dracut-lib.sh + if [ -f /dracut-state.sh ]; then + . /dracut-state.sh 2>/dev/null + fi + + source_conf /etc/conf.d + + type plymouth >/dev/null 2>&1 && plymouth quit + + source_hook "emergency" + while read _tty rest; do + ( + echo + echo + echo 'Entering kdump emergency mode.' + echo 'Type "journalctl" to view system logs.' + echo 'Type "rdsosreport" to generate a sosreport, you can then' + echo 'save it elsewhere and attach it to a bug report.' + echo + echo + ) > /dev/$_tty + done < /proc/consoles + sh -i -l + /bin/rm -f -- /.console_lock +} + +do_failure_action() +{ + dinfo "Executing failure action $FAILURE_ACTION" + eval $FAILURE_ACTION +} + +do_final_action() +{ + dinfo "Executing final action $FINAL_ACTION" + eval $FINAL_ACTION +} diff --git a/SOURCES/kdump-lib.sh b/SOURCES/kdump-lib.sh new file mode 100755 index 0000000..861f6e4 --- /dev/null +++ b/SOURCES/kdump-lib.sh @@ -0,0 +1,1089 @@ +#!/bin/sh +# +# Kdump common variables and functions +# + +DEFAULT_PATH="/var/crash/" +FENCE_KDUMP_CONFIG_FILE="/etc/sysconfig/fence_kdump" +FENCE_KDUMP_SEND="/usr/libexec/fence_kdump_send" +FADUMP_ENABLED_SYS_NODE="/sys/kernel/fadump_enabled" + +is_fadump_capable() +{ + # Check if firmware-assisted dump is enabled + # if no, fallback to kdump check + if [ -f $FADUMP_ENABLED_SYS_NODE ]; then + rc=`cat $FADUMP_ENABLED_SYS_NODE` + [ $rc -eq 1 ] && return 0 + fi + return 1 +} + +is_squash_available() { + for kmodule in squashfs overlay loop; do + if [ -z "$KDUMP_KERNELVER" ]; then + modprobe --dry-run $kmodule &>/dev/null || return 1 + else + modprobe -S $KDUMP_KERNELVER --dry-run $kmodule &>/dev/null || return 1 + fi + done +} + +perror_exit() { + derror "$@" + exit 1 +} + +is_fs_type_nfs() +{ + [ "$1" = "nfs" ] || [ "$1" = "nfs4" ] +} + +is_ssh_dump_target() +{ + grep -q "^ssh[[:blank:]].*@" /etc/kdump.conf +} + +is_nfs_dump_target() +{ + if grep -q "^nfs" /etc/kdump.conf; then + return 0; + fi + + if is_fs_type_nfs $(get_dracut_args_fstype "$(grep "^dracut_args .*\-\-mount" /etc/kdump.conf)"); then + return 0 + fi + + local _save_path=$(get_save_path) + local _target=$(get_target_from_path $_save_path) + local _fstype=$(get_fs_type_from_target $_target) + + if is_fs_type_nfs $_fstype; then + return 0 + fi + + return 1 +} + +is_raw_dump_target() +{ + grep -q "^raw" /etc/kdump.conf +} + +is_fs_dump_target() +{ + egrep -q "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf +} + +strip_comments() +{ + echo $@ | sed -e 's/\(.*\)#.*/\1/' +} + +# Read from kdump config file stripping all comments +read_strip_comments() +{ + # strip heading spaces, and print any content starting with + # neither space or #, and strip everything after # + sed -n -e "s/^\s*\([^# \t][^#]\+\).*/\1/gp" $1 +} + +# Check if fence kdump is configured in Pacemaker cluster +is_pcs_fence_kdump() +{ + # no pcs or fence_kdump_send executables installed? + type -P pcs > /dev/null || return 1 + [ -x $FENCE_KDUMP_SEND ] || return 1 + + # fence kdump not configured? + (pcs cluster cib | grep 'type="fence_kdump"') &> /dev/null || return 1 +} + +# Check if fence_kdump is configured using kdump options +is_generic_fence_kdump() +{ + [ -x $FENCE_KDUMP_SEND ] || return 1 + + grep -q "^fence_kdump_nodes" /etc/kdump.conf +} + +to_dev_name() { + local dev="${1//\"/}" + + case "$dev" in + UUID=*) + dev=`blkid -U "${dev#UUID=}"` + ;; + LABEL=*) + dev=`blkid -L "${dev#LABEL=}"` + ;; + esac + echo $dev +} + +is_user_configured_dump_target() +{ + grep -E -q "^ext[234]|^xfs|^btrfs|^minix|^raw|^nfs|^ssh" /etc/kdump.conf || is_mount_in_dracut_args; +} + +get_user_configured_dump_disk() +{ + local _target + + _target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw" /etc/kdump.conf 2>/dev/null |awk '{print $2}') + [ -n "$_target" ] && echo $_target && return + + _target=$(get_dracut_args_target "$(grep "^dracut_args .*\-\-mount" /etc/kdump.conf)") + [ -b "$_target" ] && echo $_target +} + +get_root_fs_device() +{ + findmnt -k -f -n -o SOURCE / +} + +get_save_path() +{ + local _save_path=$(awk '$1 == "path" {print $2}' /etc/kdump.conf) + [ -z "$_save_path" ] && _save_path=$DEFAULT_PATH + + # strip the duplicated "/" + echo $_save_path | tr -s / +} + +get_block_dump_target() +{ + local _target _path + + if is_ssh_dump_target || is_nfs_dump_target; then + return + fi + + _target=$(get_user_configured_dump_disk) + [ -n "$_target" ] && echo $(to_dev_name $_target) && return + + # Get block device name from local save path + _path=$(get_save_path) + _target=$(get_target_from_path $_path) + [ -b "$_target" ] && echo $(to_dev_name $_target) +} + +is_dump_to_rootfs() +{ + grep -E "^(failure_action|default)[[:space:]]dump_to_rootfs" /etc/kdump.conf >/dev/null +} + +get_failure_action_target() +{ + local _target + + if is_dump_to_rootfs; then + # Get rootfs device name + _target=$(get_root_fs_device) + [ -b "$_target" ] && echo $(to_dev_name $_target) && return + # Then, must be nfs root + echo "nfs" + fi +} + +# Get kdump targets(including root in case of dump_to_rootfs). +get_kdump_targets() +{ + local _target _root + local kdump_targets + + _target=$(get_block_dump_target) + if [ -n "$_target" ]; then + kdump_targets=$_target + elif is_ssh_dump_target; then + kdump_targets="ssh" + else + kdump_targets="nfs" + fi + + # Add the root device if dump_to_rootfs is specified. + _root=$(get_failure_action_target) + if [ -n "$_root" -a "$kdump_targets" != "$_root" ]; then + kdump_targets="$kdump_targets $_root" + fi + + echo "$kdump_targets" +} + +# Return the bind mount source path, return the path itself if it's not bind mounted +# Eg. if /path/to/src is bind mounted to /mnt/bind, then: +# /mnt/bind -> /path/to/src, /mnt/bind/dump -> /path/to/src/dump +# +# findmnt uses the option "-v, --nofsroot" to exclusive the [/dir] +# in the SOURCE column for bind-mounts, then if $_src equals to +# $_src_nofsroot, the mountpoint is not bind mounted directory. +# +# Below is just an example for mount info +# /dev/mapper/atomicos-root[/ostree/deploy/rhel-atomic-host/var], if the +# directory is bind mounted. The former part represents the device path, rest +# part is the bind mounted directory which quotes by bracket "[]". +get_bind_mount_source() +{ + local _mnt=$(df $1 | tail -1 | awk '{print $NF}') + local _path=${1#$_mnt} + + local _src=$(get_mount_info SOURCE target $_mnt -f) + local _opt=$(get_mount_info OPTIONS target $_mnt -f) + local _fstype=$(get_mount_info FSTYPE target $_mnt -f) + + # bind mount in fstab + if [[ -d "$_src" ]] && [[ "$_fstype" = none ]] && (echo "$_opt" | grep -q "\bbind\b"); then + echo $_src$_path && return + fi + + # direct mount + local _src_nofsroot=$(get_mount_info SOURCE target $_mnt -v -f) + if [[ $_src_nofsroot = $_src ]]; then + echo $_mnt$_path && return + fi + + local _fsroot=${_src#$_src_nofsroot[} + _fsroot=${_fsroot%]} + _mnt=$(get_mount_info TARGET source $_src_nofsroot -f) + + # for btrfs, _fsroot will also contain the subvol value as well, strip it + if [[ "$_fstype" = btrfs ]]; then + local _subvol + _subvol=${_opt#*subvol=} + _subvol=${_subvol%,*} + _fsroot=${_fsroot#$_subvol} + fi + echo $_mnt$_fsroot$_path +} + +# Return the current underlaying device of a path, ignore bind mounts +get_target_from_path() +{ + local _target + + _target=$(df $1 2>/dev/null | tail -1 | awk '{print $1}') + [[ "$_target" == "/dev/root" ]] && [[ ! -e /dev/root ]] && _target=$(get_root_fs_device) + echo $_target +} + +is_mounted() +{ + findmnt -k -n $1 &>/dev/null +} + +get_mount_info() +{ + local _info_type=$1 _src_type=$2 _src=$3; shift 3 + local _info=$(findmnt -k -n -r -o $_info_type --$_src_type $_src $@) + + [ -z "$_info" ] && [ -e "/etc/fstab" ] && _info=$(findmnt -s -n -r -o $_info_type --$_src_type $_src $@) + + echo $_info +} + +get_fs_type_from_target() +{ + get_mount_info FSTYPE source $1 -f +} + +get_mntopt_from_target() +{ + get_mount_info OPTIONS source $1 -f +} +# Find the general mount point of a dump target, not the bind mount point +get_mntpoint_from_target() +{ + # Expcilitly specify --source to findmnt could ensure non-bind mount is returned + get_mount_info TARGET source $1 -f +} + +# Get the path where the target will be mounted in kdump kernel +# $1: kdump target device +get_kdump_mntpoint_from_target() +{ + local _mntpoint=$(get_mntpoint_from_target $1) + + # mount under /sysroot if dump to root disk or mount under + # mount under /kdumproot if dump target is not mounted in first kernel + # mount under /kdumproot/$_mntpoint in other cases in 2nd kernel. + # systemd will be in charge to umount it. + if [ -z "$_mntpoint" ];then + _mntpoint="/kdumproot" + else + if [ "$_mntpoint" = "/" ];then + _mntpoint="/sysroot" + else + _mntpoint="/kdumproot/$_mntpoint" + fi + fi + + # strip duplicated "/" + echo $_mntpoint | tr -s "/" +} + +# get_option_value +# retrieves value of option defined in kdump.conf +get_option_value() { + strip_comments `grep "^$1[[:space:]]\+" /etc/kdump.conf | tail -1 | cut -d\ -f2-` +} + +kdump_get_persistent_dev() { + local dev="${1//\"/}" + + case "$dev" in + UUID=*) + dev=`blkid -U "${dev#UUID=}"` + ;; + LABEL=*) + dev=`blkid -L "${dev#LABEL=}"` + ;; + esac + echo $(get_persistent_dev "$dev") +} + +is_atomic() +{ + grep -q "ostree" /proc/cmdline +} + +is_ipv6_address() +{ + echo $1 | grep -q ":" +} + +# get ip address or hostname from nfs/ssh config value +get_remote_host() +{ + local _config_val=$1 + + # ipv6 address in kdump.conf is around with "[]", + # factor out the ipv6 address + _config_val=${_config_val#*@} + _config_val=${_config_val%:/*} + _config_val=${_config_val#[} + _config_val=${_config_val%]} + echo $_config_val +} + +is_hostname() +{ + local _hostname=`echo $1 | grep ":"` + + if [ -n "$_hostname" ]; then + return 1 + fi + echo $1 | grep -q "[a-zA-Z]" +} + +# Copied from "/etc/sysconfig/network-scripts/network-functions" +get_hwaddr() +{ + if [ -f "/sys/class/net/${1}/address" ]; then + awk '{ print toupper($0) }' < /sys/class/net/${1}/address + elif [ -d "/sys/class/net/${1}" ]; then + LC_ALL= LANG= ip -o link show ${1} 2>/dev/null | \ + awk '{ print toupper(gensub(/.*link\/[^ ]* ([[:alnum:]:]*).*/, + "\\1", 1)); }' + fi +} + + +# Get value by a field using "nmcli -g" +# +# "nmcli --get-values" allows us to retrive value(s) by field, for example, +# nmcli --get-values connection show /org/freedesktop/NetworkManager/ActiveConnection/1 +# returns the following value for the corresponding field respectively, +# Field Value +# IP4.DNS "10.19.42.41 | 10.11.5.19 | 10.5.30.160" +# 802-3-ethernet.s390-subchannels "" +# bond.options "mode=balance-rr" +get_nmcli_value_by_field() +{ + local _nm_show_cmd=$1 + local _field=$2 + + local val=$(LANG=C nmcli --get-values $_field $_nm_show_cmd) + + echo -n "$val" +} + +# Get nmcli connection apath (a D-Bus active connection path ) by ifname +# +# apath is used for nmcli connection operations, e.g. +# $ nmcli connection show $apath +get_nmcli_connection_apath_by_ifname() +{ + local _ifname=$1 + local _nm_show_cmd="device show $_ifname" + + local _apath=$(get_nmcli_value_by_field "$_nm_show_cmd" "GENERAL.CON-PATH") + + echo -n "$_apath" +} + +# Get nmcli connection show cmd by ifname +# +# "$_apath" is supposed to not contain any chracter that +# need to be escapded, e.g. space. Otherwise get_nmcli_value_by_field +# would fail. +get_nmcli_connection_show_cmd_by_ifname() +{ + local _ifname="$1" + local _apath=$(get_nmcli_connection_apath_by_ifname "$_ifname") + local _nm_show_cmd="connection show $_apath" + + echo -n "$_nm_show_cmd" +} + +get_ifcfg_by_device() +{ + grep -E -i -l "^[[:space:]]*DEVICE=\"*${1}\"*[[:space:]]*$" \ + /etc/sysconfig/network-scripts/ifcfg-* 2>/dev/null | head -1 +} + +get_ifcfg_by_hwaddr() +{ + grep -E -i -l "^[[:space:]]*HWADDR=\"*${1}\"*[[:space:]]*$" \ + /etc/sysconfig/network-scripts/ifcfg-* 2>/dev/null | head -1 +} + +get_ifcfg_by_uuid() +{ + grep -E -i -l "^[[:space:]]*UUID=\"*${1}\"*[[:space:]]*$" \ + /etc/sysconfig/network-scripts/ifcfg-* 2>/dev/null | head -1 +} + +get_ifcfg_by_name() +{ + grep -E -i -l "^[[:space:]]*NAME=\"*${1}\"*[[:space:]]*$" \ + /etc/sysconfig/network-scripts/ifcfg-* 2>/dev/null | head -1 +} + +is_nm_running() +{ + [ "$(LANG=C nmcli -t --fields running general status 2>/dev/null)" = "running" ] +} + +is_nm_handling() +{ + LANG=C nmcli -t --fields device,state dev status 2>/dev/null \ + | grep -q "^\(${1}:connected\)\|\(${1}:connecting.*\)$" +} + +# $1: netdev name +get_ifcfg_nmcli() +{ + local nm_uuid nm_name + local ifcfg_file + + # Get the active nmcli config name of $1 + if is_nm_running && is_nm_handling "${1}" ; then + # The configuration "uuid" and "name" generated by nm is wrote to + # the ifcfg file as "UUID=" and "NAME=". + nm_uuid=$(LANG=C nmcli -t --fields uuid,device c show --active 2>/dev/null \ + | grep "${1}" | head -1 | cut -d':' -f1) + nm_name=$(LANG=C nmcli -t --fields name,device c show --active 2>/dev/null \ + | grep "${1}" | head -1 | cut -d':' -f1) + ifcfg_file=$(get_ifcfg_by_uuid "${nm_uuid}") + [ -z "${ifcfg_file}" ] && ifcfg_file=$(get_ifcfg_by_name "${nm_name}") + fi + + echo -n "${ifcfg_file}" +} + +# $1: netdev name +get_ifcfg_legacy() +{ + local ifcfg_file + + ifcfg_file="/etc/sysconfig/network-scripts/ifcfg-${1}" + [ -f "${ifcfg_file}" ] && echo -n "${ifcfg_file}" && return + + ifcfg_file=$(get_ifcfg_by_name "${1}") + [ -f "${ifcfg_file}" ] && echo -n "${ifcfg_file}" && return + + local hwaddr=$(get_hwaddr "${1}") + if [ -n "$hwaddr" ]; then + ifcfg_file=$(get_ifcfg_by_hwaddr "${hwaddr}") + [ -f "${ifcfg_file}" ] && echo -n "${ifcfg_file}" && return + fi + + ifcfg_file=$(get_ifcfg_by_device "${1}") + + echo -n "${ifcfg_file}" +} + +# $1: netdev name +# Return the ifcfg file whole name(including the path) of $1 if any. +get_ifcfg_filename() { + local ifcfg_file + + ifcfg_file=$(get_ifcfg_nmcli "${1}") + if [ -z "${ifcfg_file}" ]; then + ifcfg_file=$(get_ifcfg_legacy "${1}") + fi + + echo -n "${ifcfg_file}" +} + +# returns 0 when omission of a module is desired in dracut_args +# returns 1 otherwise +is_dracut_mod_omitted() { + local dracut_args dracut_mod=$1 + + set -- $(grep "^dracut_args" /etc/kdump.conf) + while [ $# -gt 0 ]; do + case $1 in + -o|--omit) + [[ " ${2//[^[:alnum:]]/ } " == *" $dracut_mod "* ]] && return 0 + esac + shift + done + + return 1 +} + +is_wdt_active() { + local active + + [ -d /sys/class/watchdog ] || return 1 + for dir in /sys/class/watchdog/*; do + [ -f "$dir/state" ] || continue + active=$(< "$dir/state") + [ "$active" = "active" ] && return 0 + done + return 1 +} + +# If "dracut_args" contains "--mount" information, use it +# directly without any check(users are expected to ensure +# its correctness). +is_mount_in_dracut_args() +{ + grep -q "^dracut_args .*\-\-mount" /etc/kdump.conf +} + +# If $1 contains dracut_args "--mount", return +get_dracut_args_fstype() +{ + echo $1 | grep "\-\-mount" | sed "s/.*--mount .\(.*\)/\1/" | cut -d' ' -f3 +} + +# If $1 contains dracut_args "--mount", return +get_dracut_args_target() +{ + echo $1 | grep "\-\-mount" | sed "s/.*--mount .\(.*\)/\1/" | cut -d' ' -f1 +} + +check_crash_mem_reserved() +{ + local mem_reserved + + mem_reserved=$(cat /sys/kernel/kexec_crash_size) + if [ $mem_reserved -eq 0 ]; then + derror "No memory reserved for crash kernel" + return 1 + fi + + return 0 +} + +check_kdump_feasibility() +{ + if [ ! -e /sys/kernel/kexec_crash_loaded ]; then + derror "Kdump is not supported on this kernel" + return 1 + fi + check_crash_mem_reserved + return $? +} + +check_current_kdump_status() +{ + if [ ! -f /sys/kernel/kexec_crash_loaded ];then + derror "Perhaps CONFIG_CRASH_DUMP is not enabled in kernel" + return 1 + fi + + rc=`cat /sys/kernel/kexec_crash_loaded` + if [ $rc == 1 ]; then + return 0 + else + return 1 + fi +} + +# remove_cmdline_param [] ... [] +# Remove a list of kernel parameters from a given kernel cmdline and print the result. +# For each "arg" in the removing params list, "arg" and "arg=xxx" will be removed if exists. +remove_cmdline_param() +{ + local cmdline=$1 + shift + + for arg in $@; do + cmdline=`echo $cmdline | \ + sed -e "s/\b$arg=[^ ]*//g" \ + -e "s/^$arg\b//g" \ + -e "s/[[:space:]]$arg\b//g" \ + -e "s/\s\+/ /g"` + done + echo $cmdline +} + +# +# This function returns the "apicid" of the boot +# cpu (cpu 0) if present. +# +get_bootcpu_apicid() +{ + awk ' \ + BEGIN { CPU = "-1"; } \ + $1=="processor" && $2==":" { CPU = $NF; } \ + CPU=="0" && /^apicid/ { print $NF; } \ + ' \ + /proc/cpuinfo +} + +# +# append_cmdline +# This function appends argument "$2=$3" to string ($1) if not already present. +# +append_cmdline() +{ + local cmdline=$1 + local newstr=${cmdline/$2/""} + + # unchanged str implies argument wasn't there + if [ "$cmdline" == "$newstr" ]; then + cmdline="${cmdline} ${2}=${3}" + fi + + echo $cmdline +} + +# This function check iomem and determines if we have more than +# 4GB of ram available. Returns 1 if we do, 0 if we dont +need_64bit_headers() +{ + return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \ + print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'` +} + +# Check if secure boot is being enforced. +# +# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and +# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four +# bytes are the attributes associated with the variable and can safely be +# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot +# is 1 and SetupMode is 0, then secure boot is being enforced. +# +# Assume efivars is mounted at /sys/firmware/efi/efivars. +is_secure_boot_enforced() +{ + local secure_boot_file setup_mode_file + local secure_boot_byte setup_mode_byte + + # On powerpc, os-secureboot-enforcing DT property indicates whether secureboot + # is enforced. Return success, if it is found. + if [ -f /proc/device-tree/ibm,secureboot/os-secureboot-enforcing ]; then + return 0 + fi + + # Detect secure boot on x86 and arm64 + secure_boot_file=$(find /sys/firmware/efi/efivars -name SecureBoot-* 2>/dev/null) + setup_mode_file=$(find /sys/firmware/efi/efivars -name SetupMode-* 2>/dev/null) + + if [ -f "$secure_boot_file" ] && [ -f "$setup_mode_file" ]; then + secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' $secure_boot_file|cut -d' ' -f 5) + setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' $setup_mode_file|cut -d' ' -f 5) + + if [ "$secure_boot_byte" = "1" ] && [ "$setup_mode_byte" = "0" ]; then + return 0 + fi + fi + + # Detect secure boot on s390x + if [[ -e "/sys/firmware/ipl/secure" && "$(cat /sys/firmware/ipl/secure)" == "1" ]]; then + return 0 + fi + + return 1 +} + +# +# prepare_kexec_args +# This function prepares kexec argument. +# +prepare_kexec_args() +{ + local kexec_args=$1 + local found_elf_args + + ARCH=`uname -m` + if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ] + then + need_64bit_headers + if [ $? == 1 ] + then + found_elf_args=`echo $kexec_args | grep elf32-core-headers` + if [ -n "$found_elf_args" ] + then + dwarn "Warning: elf32-core-headers overrides correct elf64 setting" + else + kexec_args="$kexec_args --elf64-core-headers" + fi + else + found_elf_args=`echo $kexec_args | grep elf64-core-headers` + if [ -z "$found_elf_args" ] + then + kexec_args="$kexec_args --elf32-core-headers" + fi + fi + fi + echo $kexec_args +} + +# +# Detect initrd and kernel location, results are stored in global enviromental variables: +# KDUMP_BOOTDIR, KDUMP_KERNELVER, KDUMP_KERNEL, DEFAULT_INITRD, and KDUMP_INITRD +# +# Expectes KDUMP_BOOTDIR, KDUMP_IMG, KDUMP_IMG_EXT, KDUMP_KERNELVER to be loaded from config already +# and will prefer already set values so user can specify custom kernel/initramfs location +# +prepare_kdump_bootinfo() +{ + local boot_imglist boot_dirlist boot_initrdlist curr_kver="$(uname -r)" + local machine_id + + if [ -z "$KDUMP_KERNELVER" ]; then + KDUMP_KERNELVER="$(uname -r)" + fi + + read machine_id < /etc/machine-id + boot_dirlist=${KDUMP_BOOTDIR:-"/boot /boot/efi /efi /"} + boot_imglist="$KDUMP_IMG-$KDUMP_KERNELVER$KDUMP_IMG_EXT $machine_id/$KDUMP_KERNELVER/$KDUMP_IMG" + + # Use BOOT_IMAGE as reference if possible, strip the GRUB root device prefix in (hd0,gpt1) format + local boot_img="$(cat /proc/cmdline | sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\) .*/\2/")" + if [ -n "$boot_img" ]; then + boot_imglist="$boot_img $boot_imglist" + fi + + for dir in $boot_dirlist; do + for img in $boot_imglist; do + if [ -f "$dir/$img" ]; then + KDUMP_KERNEL=$(echo $dir/$img | tr -s '/') + break 2 + fi + done + done + + if ! [ -e "$KDUMP_KERNEL" ]; then + derror "Failed to detect kdump kernel location" + return 1 + fi + + # Set KDUMP_BOOTDIR to where kernel image is stored + KDUMP_BOOTDIR=$(dirname $KDUMP_KERNEL) + + # Default initrd should just stay aside of kernel image, try to find it in KDUMP_BOOTDIR + boot_initrdlist="initramfs-$KDUMP_KERNELVER.img initrd" + for initrd in $boot_initrdlist; do + if [ -f "$KDUMP_BOOTDIR/$initrd" ]; then + defaut_initrd_base="$initrd" + DEFAULT_INITRD="$KDUMP_BOOTDIR/$defaut_initrd_base" + break + fi + done + + # Create kdump initrd basename from default initrd basename + # initramfs-5.7.9-200.fc32.x86_64.img => initramfs-5.7.9-200.fc32.x86_64kdump.img + # initrd => initrdkdump + if [[ -z "$defaut_initrd_base" ]]; then + kdump_initrd_base=initramfs-${KDUMP_KERNELVER}kdump.img + elif [[ $defaut_initrd_base == *.* ]]; then + kdump_initrd_base=${defaut_initrd_base%.*}kdump.${DEFAULT_INITRD##*.} + else + kdump_initrd_base=${defaut_initrd_base}kdump + fi + + # Place kdump initrd in `/var/lib/kdump` if `KDUMP_BOOTDIR` not writable + if [[ ! -w "$KDUMP_BOOTDIR" ]];then + var_target_initrd_dir="/var/lib/kdump" + mkdir -p "$var_target_initrd_dir" + KDUMP_INITRD="$var_target_initrd_dir/$kdump_initrd_base" + else + KDUMP_INITRD="$KDUMP_BOOTDIR/$kdump_initrd_base" + fi +} + +get_watchdog_drvs() +{ + local _wdtdrvs _drv _dir + + for _dir in /sys/class/watchdog/*; do + # device/modalias will return driver of this device + [[ -f "$_dir/device/modalias" ]] || continue + _drv=$(< "$_dir/device/modalias") + _drv=$(modprobe --set-version "$KDUMP_KERNELVER" -R $_drv 2>/dev/null) + for i in $_drv; do + if ! [[ " $_wdtdrvs " == *" $i "* ]]; then + _wdtdrvs="$_wdtdrvs $i" + fi + done + done + + echo $_wdtdrvs +} + +# +# prepare_cmdline +# This function performs a series of edits on the command line. +# Store the final result in global $KDUMP_COMMANDLINE. +prepare_cmdline() +{ + local cmdline id + + if [ -z "$1" ]; then + cmdline=$(cat /proc/cmdline) + else + cmdline="$1" + fi + + # These params should always be removed + cmdline=$(remove_cmdline_param "$cmdline" crashkernel panic_on_warn) + # These params can be removed configurably + cmdline=$(remove_cmdline_param "$cmdline" "$2") + + # Always remove "root=X", as we now explicitly generate all kinds + # of dump target mount information including root fs. + # + # We do this before KDUMP_COMMANDLINE_APPEND, if one really cares + # about it(e.g. for debug purpose), then can pass "root=X" using + # KDUMP_COMMANDLINE_APPEND. + cmdline=$(remove_cmdline_param "$cmdline" root) + + # With the help of "--hostonly-cmdline", we can avoid some interitage. + cmdline=$(remove_cmdline_param "$cmdline" rd.lvm.lv rd.luks.uuid rd.dm.uuid rd.md.uuid fcoe) + + # Remove netroot, rd.iscsi.initiator and iscsi_initiator since + # we get duplicate entries for the same in case iscsi code adds + # it as well. + cmdline=$(remove_cmdline_param "$cmdline" netroot rd.iscsi.initiator iscsi_initiator) + + cmdline="${cmdline} $3" + + id=$(get_bootcpu_apicid) + if [ ! -z ${id} ] ; then + cmdline=$(append_cmdline "${cmdline}" disable_cpu_apicid ${id}) + fi + + # If any watchdog is used, set it's pretimeout to 0. pretimeout let + # watchdog panic the kernel first, and reset the system after the + # panic. If the system is already in kdump, panic is not helpful + # and only increase the chance of watchdog failure. + for i in $(get_watchdog_drvs); do + cmdline+=" $i.pretimeout=0" + + if [[ $i == hpwdt ]]; then + # hpwdt have a special parameter kdumptimeout, is's only suppose + # to be set to non-zero in first kernel. In kdump, non-zero + # value could prevent the watchdog from resetting the system. + cmdline+=" $i.kdumptimeout=0" + fi + done + + echo ${cmdline} +} + +#get system memory size in the unit of GB +get_system_size() +{ + result=$(cat /proc/iomem | grep "System RAM" | awk -F ":" '{ print $1 }' | tr [:lower:] [:upper:] | paste -sd+) + result="+$result" + # replace '-' with '+0x' and '+' with '-0x' + sum=$( echo $result | sed -e 's/-/K0x/g' | sed -e 's/+/-0x/g' | sed -e 's/K/+/g' ) + size=$(printf "%d\n" $(($sum))) + let size=$size/1024/1024/1024 + + echo $size +} + +get_recommend_size() +{ + local mem_size=$1 + local _ck_cmdline=$2 + local OLDIFS="$IFS" + + last_sz="" + last_unit="" + + start=${_ck_cmdline: :1} + if [ $mem_size -lt $start ]; then + echo "0M" + return + fi + IFS=',' + for i in $_ck_cmdline; do + end=$(echo $i | awk -F "-" '{ print $2 }' | awk -F ":" '{ print $1 }') + recommend=$(echo $i | awk -F "-" '{ print $2 }' | awk -F ":" '{ print $2 }') + size=${end: : -1} + unit=${end: -1} + if [ $unit == 'T' ]; then + let size=$size*1024 + fi + if [ $mem_size -lt $size ]; then + echo $recommend + IFS="$OLDIFS" + return + fi + done + IFS="$OLDIFS" +} + +# return recommended size based on current system RAM size +# $1: kernel version, if not set, will defaults to `uname -r` +kdump_get_arch_recommend_size() +{ + local kernel=$1 arch + + if ! [ -r "/proc/iomem" ] ; then + echo "Error, can not access /proc/iomem." + return 1 + fi + + [ -z "$kernel" ] && kernel=$(uname -r) + ck_cmdline=$(cat "/usr/lib/modules/$kernel/crashkernel.default" 2>/dev/null) + + if [ -n "$ck_cmdline" ]; then + ck_cmdline=${ck_cmdline#crashkernel=} + else + arch=$(lscpu | grep Architecture | awk -F ":" '{ print $2 }' | tr '[:lower:]' '[:upper:]') + if [ "$arch" = "X86_64" ] || [ "$arch" = "S390X" ]; then + ck_cmdline="1G-4G:160M,4G-64G:192M,64G-1T:256M,1T-:512M" + elif [ "$arch" = "AARCH64" ]; then + ck_cmdline="2G-:448M" + elif [ "$arch" = "PPC64LE" ]; then + if is_fadump_capable; then + ck_cmdline="4G-16G:768M,16G-64G:1G,64G-128G:2G,128G-1T:4G,1T-2T:6G,2T-4T:12G,4T-8T:20G,8T-16T:36G,16T-32T:64G,32T-64T:128G,64T-:180G" + else + ck_cmdline="2G-4G:384M,4G-16G:512M,16G-64G:1G,64G-128G:2G,128G-:4G" + fi + fi + fi + + ck_cmdline=$(echo $ck_cmdline | sed -e 's/-:/-102400T:/g') + sys_mem=$(get_system_size) + + get_recommend_size "$sys_mem" "$ck_cmdline" +} + +# Print all underlying crypt devices of a block device +# print nothing if device is not on top of a crypt device +# $1: the block device to be checked in maj:min format +get_luks_crypt_dev() +{ + [[ -b /dev/block/$1 ]] || return 1 + + local _type=$(eval "$(blkid -u filesystem,crypto -o export -- /dev/block/$1); echo \$TYPE") + [[ $_type == "crypto_LUKS" ]] && echo $1 + + for _x in /sys/dev/block/$1/slaves/*; do + [[ -f $_x/dev ]] || continue + [[ $_x/subsystem -ef /sys/class/block ]] || continue + get_luks_crypt_dev "$(< "$_x/dev")" + done +} + +# kdump_get_maj_min +# Prints the major and minor of a device node. +# Example: +# $ get_maj_min /dev/sda2 +# 8:2 +kdump_get_maj_min() { + local _majmin + _majmin="$(stat -L -c '%t:%T' "$1" 2> /dev/null)" + printf "%s" "$((0x${_majmin%:*})):$((0x${_majmin#*:}))" +} + +get_all_kdump_crypt_dev() +{ + local _dev _crypt + + for _dev in $(get_block_dump_target); do + _crypt=$(get_luks_crypt_dev $(kdump_get_maj_min "$_dev")) + [[ -n "$_crypt" ]] && echo $_crypt + done +} + +check_vmlinux() +{ + # Use readelf to check if it's a valid ELF + readelf -h $1 &>/dev/null || return 1 +} + +get_vmlinux_size() +{ + local size=0 + + while read _type _offset _virtaddr _physaddr _fsize _msize _flg _aln; do + size=$(( $size + $_msize )) + done <<< $(readelf -l -W $1 | grep "^ LOAD" 2>/dev/stderr) + + echo $size +} + +try_decompress() +{ + # The obscure use of the "tr" filter is to work around older versions of + # "grep" that report the byte offset of the line instead of the pattern. + + # Try to find the header ($1) and decompress from here + for pos in `tr "$1\n$2" "\n$2=" < "$4" | grep -abo "^$2"` + do + if ! type -P $3 > /dev/null; then + ddebug "Signiature detected but '$3' is missing, skip this decompressor" + break + fi + + pos=${pos%%:*} + tail -c+$pos "$img" | $3 > $5 2> /dev/null + if check_vmlinux $5; then + ddebug "Kernel is extracted with '$3'" + return 0 + fi + done + + return 1 +} + +# Borrowed from linux/scripts/extract-vmlinux +get_kernel_size() +{ + # Prepare temp files: + local img=$1 tmp=$(mktemp /tmp/vmlinux-XXX) + trap "rm -f $tmp" 0 + + # Try to check if it's a vmlinux already + check_vmlinux $img && get_vmlinux_size $img && return 0 + + # That didn't work, so retry after decompression. + try_decompress '\037\213\010' xy gunzip $img $tmp || \ + try_decompress '\3757zXZ\000' abcde unxz $img $tmp || \ + try_decompress 'BZh' xy bunzip2 $img $tmp || \ + try_decompress '\135\0\0\0' xxx unlzma $img $tmp || \ + try_decompress '\211\114\132' xy 'lzop -d' $img $tmp || \ + try_decompress '\002!L\030' xxx 'lz4 -d' $img $tmp || \ + try_decompress '(\265/\375' xxx unzstd $img $tmp + + # Finally check for uncompressed images or objects: + [[ $? -eq 0 ]] && get_vmlinux_size $tmp && return 0 + + # Fallback to use iomem + local _size=0 + for _seg in $(cat /proc/iomem | grep -E "Kernel (code|rodata|data|bss)" | cut -d ":" -f 1); do + _size=$(( $_size + 0x${_seg#*-} - 0x${_seg%-*} )) + done + echo $_size +} diff --git a/SOURCES/kdump-logger.sh b/SOURCES/kdump-logger.sh new file mode 100755 index 0000000..370e5e8 --- /dev/null +++ b/SOURCES/kdump-logger.sh @@ -0,0 +1,348 @@ +#!/bin/bash +# +# This comes from the dracut-logger.sh +# +# The logger defined 4 logging levels: +# - ddebug (4) +# The DEBUG Level designates fine-grained informational events that are most +# useful to debug an application. +# - dinfo (3) +# The INFO level designates informational messages that highlight the +# progress of the application at coarse-grained level. +# - dwarn (2) +# The WARN level designates potentially harmful situations. +# - derror (1) +# The ERROR level designates error events that might still allow the +# application to continue running. +# +# Logging is controlled by following global variables: +# - @var kdump_stdloglvl - logging level to standard error (console output) +# - @var kdump_sysloglvl - logging level to syslog (by logger command) +# - @var kdump_kmsgloglvl - logging level to /dev/kmsg (only for boot-time) +# +# If any of the variables is not set, the function dlog_init() sets it to default: +# - In the first kernel: +# - @var kdump_stdloglvl = 3 (info) +# - @var kdump_sysloglvl = 0 (no logging) +# - @var kdump_kmsgloglvl = 0 (no logging) +# +# -In the second kernel: +# - @var kdump_stdloglvl = 0 (no logging) +# - @var kdump_sysloglvl = 3 (info) +# - @var kdump_kmsgloglvl = 0 (no logging) +# +# First of all you have to start with dlog_init() function which initializes +# required variables. Don't call any other logging function before that one! +# + +# Define vairables for the log levels in this module. +kdump_stdloglvl="" +kdump_sysloglvl="" +kdump_kmsgloglvl="" + +# The dracut-lib.sh is only available in the second kernel, and it won't +# be used in the first kernel because the dracut-lib.sh is invisible in +# the first kernel. +if [ -f /lib/dracut-lib.sh ]; then + . /lib/dracut-lib.sh +fi + +# @brief Get the log level from kernel command line. +# @retval 1 if something has gone wrong +# @retval 0 on success. +# +get_kdump_loglvl() +{ + (type -p getarg) && kdump_sysloglvl=$(getarg rd.kdumploglvl) + [ -z "$kdump_sysloglvl" ] && return 1; + + (type -p isdigit) && isdigit $kdump_sysloglvl + [ $? -ne 0 ] && return 1; + + return 0 +} + +# @brief Check the log level. +# @retval 1 if something has gone wrong +# @retval 0 on success. +# +check_loglvl() +{ + case "$1" in + 0|1|2|3|4) + return 0 + ;; + *) + return 1 + ;; + esac +} + +# @brief Initializes Logger. +# @retval 1 if something has gone wrong +# @retval 0 on success. +# +dlog_init() { + local ret=0; local errmsg + + if [ -s /proc/vmcore ];then + get_kdump_loglvl + if [ $? -ne 0 ];then + logger -t "kdump[$$]" -p warn -- "Kdump is using the default log level(3)." + kdump_sysloglvl=3 + fi + kdump_stdloglvl=0 + kdump_kmsgloglvl=0 + else + kdump_stdloglvl=$KDUMP_STDLOGLVL + kdump_sysloglvl=$KDUMP_SYSLOGLVL + kdump_kmsgloglvl=$KDUMP_KMSGLOGLVL + fi + + [ -z "$kdump_stdloglvl" ] && kdump_stdloglvl=3 + [ -z "$kdump_sysloglvl" ] && kdump_sysloglvl=0 + [ -z "$kdump_kmsgloglvl" ] && kdump_kmsgloglvl=0 + + for loglvl in "$kdump_stdloglvl" "$kdump_kmsgloglvl" "$kdump_sysloglvl"; do + check_loglvl "$loglvl" + if [ $? -ne 0 ]; then + echo "Illegal log level: $kdump_stdloglvl $kdump_kmsgloglvl $kdump_sysloglvl" + return 1 + fi + done + + # Skip initialization if it's already done. + [ -n "$kdump_maxloglvl" ] && return 0 + + if [[ $UID -ne 0 ]]; then + kdump_kmsgloglvl=0 + kdump_sysloglvl=0 + fi + + if [[ $kdump_sysloglvl -gt 0 ]]; then + if [[ -d /run/systemd/journal ]] \ + && type -P systemd-cat &>/dev/null \ + && systemctl --quiet is-active systemd-journald.socket &>/dev/null; then + readonly _systemdcatfile="/var/tmp/systemd-cat" + mkfifo "$_systemdcatfile" &>/dev/null + readonly _dlogfd=15 + systemd-cat -t 'kdump' --level-prefix=true <"$_systemdcatfile" & + exec 15>"$_systemdcatfile" + elif ! [ -S /dev/log -a -w /dev/log ] || ! command -v logger >/dev/null; then + # We cannot log to syslog, so turn this facility off. + kdump_kmsgloglvl=$kdump_sysloglvl + kdump_sysloglvl=0 + ret=1 + errmsg="No '/dev/log' or 'logger' included for syslog logging" + fi + fi + + local lvl; local maxloglvl_l=0 + for lvl in $kdump_stdloglvl $kdump_sysloglvl $kdump_kmsgloglvl; do + [[ $lvl -gt $maxloglvl_l ]] && maxloglvl_l=$lvl + done + readonly kdump_maxloglvl=$maxloglvl_l + export kdump_maxloglvl + + if [[ $kdump_stdloglvl -lt 4 ]] && [[ $kdump_kmsgloglvl -lt 4 ]] && [[ $kdump_sysloglvl -lt 4 ]]; then + unset ddebug + ddebug() { :; }; + fi + + if [[ $kdump_stdloglvl -lt 3 ]] && [[ $kdump_kmsgloglvl -lt 3 ]] && [[ $kdump_sysloglvl -lt 3 ]]; then + unset dinfo + dinfo() { :; }; + fi + + if [[ $kdump_stdloglvl -lt 2 ]] && [[ $kdump_kmsgloglvl -lt 2 ]] && [[ $kdump_sysloglvl -lt 2 ]]; then + unset dwarn + dwarn() { :; }; + unset dwarning + dwarning() { :; }; + fi + + if [[ $kdump_stdloglvl -lt 1 ]] && [[ $kdump_kmsgloglvl -lt 1 ]] && [[ $kdump_sysloglvl -lt 1 ]]; then + unset derror + derror() { :; }; + fi + + [ -n "$errmsg" ] && derror "$errmsg" + + return $ret +} + +## @brief Converts numeric level to logger priority defined by POSIX.2. +# +# @param lvl Numeric logging level in range from 1 to 4. +# @retval 1 if @a lvl is out of range. +# @retval 0 if @a lvl is correct. +# @result Echoes logger priority. +_lvl2syspri() { + case "$1" in + 1) echo error;; + 2) echo warning;; + 3) echo info;; + 4) echo debug;; + *) return 1;; + esac +} + +## @brief Converts logger numeric level to syslog log level +# +# @param lvl Numeric logging level in range from 1 to 4. +# @retval 1 if @a lvl is out of range. +# @retval 0 if @a lvl is correct. +# @result Echoes kernel console numeric log level +# +# Conversion is done as follows: +# +# +# none -> LOG_EMERG (0) +# none -> LOG_ALERT (1) +# none -> LOG_CRIT (2) +# ERROR(1) -> LOG_ERR (3) +# WARN(2) -> LOG_WARNING (4) +# none -> LOG_NOTICE (5) +# INFO(3) -> LOG_INFO (6) +# DEBUG(4) -> LOG_DEBUG (7) +# +# +# @see /usr/include/sys/syslog.h +_dlvl2syslvl() { + local lvl + + case "$1" in + 1) lvl=3;; + 2) lvl=4;; + 3) lvl=6;; + 4) lvl=7;; + *) return 1;; + esac + + # The number is constructed by multiplying the facility by 8 and then + # adding the level. + # About The Syslog Protocol, please refer to the RFC5424 for more details. + echo $((24+$lvl)) +} + +## @brief Prints to stderr, to syslog and/or /dev/kmsg given message with +# given level (priority). +# +# @param lvl Numeric logging level. +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +# +# @note This function is not supposed to be called manually. Please use +# dinfo(), ddebug(), or others instead which wrap this one. +# +# This is core logging function which logs given message to standard error +# and/or syslog (with POSIX shell command logger) and/or to /dev/kmsg. +# The format is following: +# +# X: some message +# +# where @c X is the first letter of logging level. See module description for +# details on that. +# +# Message to syslog is sent with tag @c kdump. Priorities are mapped as +# following: +# - @c ERROR to @c error +# - @c WARN to @c warning +# - @c INFO to @c info +# - @c DEBUG to @c debug +_do_dlog() { + local lvl="$1"; shift + local msg="$*" + + [[ $lvl -le $kdump_stdloglvl ]] && printf -- 'kdump: %s\n' "$msg" >&2 + + if [[ $lvl -le $kdump_sysloglvl ]]; then + if [[ "$_dlogfd" ]]; then + printf -- "<%s>%s\n" "$(($(_dlvl2syslvl $lvl) & 7))" "$msg" >&$_dlogfd + else + logger -t "kdump[$$]" -p $(_lvl2syspri $lvl) -- "$msg" + fi + fi + + [[ $lvl -le $kdump_kmsgloglvl ]] && \ + echo "<$(_dlvl2syslvl $lvl)>kdump[$$] $msg" >/dev/kmsg +} + +## @brief Internal helper function for _do_dlog() +# +# @param lvl Numeric logging level. +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +# +# @note This function is not supposed to be called manually. Please use +# dinfo(), ddebug(), or others instead which wrap this one. +# +# This function calls _do_dlog() either with parameter msg, or if +# none is given, it will read standard input and will use every line as +# a message. +# +# This enables: +# dwarn "This is a warning" +# echo "This is a warning" | dwarn +dlog() { + [ -z "$kdump_maxloglvl" ] && return 0 + [[ $1 -le $kdump_maxloglvl ]] || return 0 + + if [[ $# -gt 1 ]]; then + _do_dlog "$@" + else + while read line || [ -n "$line" ]; do + _do_dlog "$1" "$line" + done + fi +} + +## @brief Logs message at DEBUG level (4) +# +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +ddebug() { + set +x + dlog 4 "$@" + [ -n "$debug" ] && set -x || : +} + +## @brief Logs message at INFO level (3) +# +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +dinfo() { + set +x + dlog 3 "$@" + [ -n "$debug" ] && set -x || : +} + +## @brief Logs message at WARN level (2) +# +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +dwarn() { + set +x + dlog 2 "$@" + [ -n "$debug" ] && set -x || : +} + +## @brief It's an alias to dwarn() function. +# +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +dwarning() { + set +x + dwarn "$@" + [ -n "$debug" ] && set -x || : +} + +## @brief Logs message at ERROR level (1) +# +# @param msg Message. +# @retval 0 It's always returned, even if logging failed. +derror() { + set +x + dlog 1 "$@" + [ -n "$debug" ] && set -x || : +} diff --git a/SOURCES/kdump-udev-throttler b/SOURCES/kdump-udev-throttler new file mode 100755 index 0000000..cd77a31 --- /dev/null +++ b/SOURCES/kdump-udev-throttler @@ -0,0 +1,42 @@ +#!/bin/bash +# This util helps to reduce the workload of kdump service restarting +# on udev event. When hotplugging memory / CPU, multiple udev +# events may be triggered concurrently, and obviously, we don't want +# to restart kdump service for each event. + +# This script will be called by udev, and make sure kdump service is +# restart after all events we are watching are settled. + +# On each call, this script will update try to aquire the $throttle_lock +# The first instance acquired the file lock will keep waiting for events +# to settle and then reload kdump. Other instances will just exit +# In this way, we can make sure kdump service is restarted immediately +# and for exactly once after udev events are settled. + +throttle_lock="/var/lock/kdump-udev-throttle" + +exec 9>$throttle_lock +if [ $? -ne 0 ]; then + echo "Failed to create the lock file! Fallback to non-throttled kdump service restart" + /bin/kdumpctl reload + exit 1 +fi + +flock -n 9 +if [ $? -ne 0 ]; then + echo "Throttling kdump restart for concurrent udev event" + exit 0 +fi + +# Wait for at least 1 second, at most 4 seconds for udev to settle +# Idealy we will have a less than 1 second lag between udev events settle +# and kdump reload +sleep 1 && udevadm settle --timeout 3 + +# Release the lock, /bin/kdumpctl will block and make the process +# holding two locks at the same time and we might miss some events +exec 9>&- + +/bin/kdumpctl reload + +exit 0 diff --git a/SOURCES/kdump.conf b/SOURCES/kdump.conf new file mode 100644 index 0000000..dea2e94 --- /dev/null +++ b/SOURCES/kdump.conf @@ -0,0 +1,185 @@ +# This file contains a series of commands to perform (in order) in the kdump +# kernel after a kernel crash in the crash kernel(1st kernel) has happened. +# +# Directives in this file are only applicable to the kdump initramfs, and have +# no effect once the root filesystem is mounted and the normal init scripts are +# processed. +# +# Currently, only one dump target and path can be specified. If the dumping to +# the configured target fails, the failure action which can be configured via +# the "failure_action" directive will be performed. +# +# Supported options: +# +# raw +# - Will dd /proc/vmcore into . +# Use persistent device names for partition devices, +# such as /dev/vg/. +# +# nfs +# - Will mount nfs to , and copy /proc/vmcore to +# //%HOST-%DATE/, supports DNS. +# +# ssh +# - Will save /proc/vmcore to :/%HOST-%DATE/, +# supports DNS. +# NOTE: make sure the user has write permissions on the server. +# +# sshkey +# - Will use the sshkey to do ssh dump. +# Specify the path of the ssh key to use when dumping +# via ssh. The default value is /root/.ssh/kdump_id_rsa. +# +# +# - Will mount -t , and copy +# /proc/vmcore to //%HOST_IP-%DATE/. +# NOTE: can be a device node, label or uuid. +# It's recommended to use persistent device names +# such as /dev/vg/. +# Otherwise it's suggested to use label or uuid. +# +# path +# - "path" represents the file system path in which vmcore +# will be saved. If a dump target is specified in +# kdump.conf, then "path" is relative to the specified +# dump target. +# +# Interpretation of "path" changes a bit if the user didn't +# specify any dump target explicitly in kdump.conf. In this +# case, "path" represents the absolute path from root. The +# dump target and adjusted path are arrived at automatically +# depending on what's mounted in the current system. +# +# Ignored for raw device dumps. If unset, will use the default +# "/var/crash". +# +# core_collector +# - This allows you to specify the command to copy +# the vmcore. The default is makedumpfile, which on +# some architectures can drastically reduce vmcore size. +# See /sbin/makedumpfile --help for a list of options. +# Note that the -i and -g options are not needed here, +# as the initrd will automatically be populated with a +# config file appropriate for the running kernel. +# The default core_collector for raw/ssh dump is: +# "makedumpfile -F -l --message-level 7 -d 31". +# The default core_collector for other targets is: +# "makedumpfile -l --message-level 7 -d 31". +# +# "makedumpfile -F" will create a flattened vmcore. +# You need to use "makedumpfile -R" to rearrange the dump data to +# a normal dumpfile readable with analysis tools. For example: +# "makedumpfile -R vmcore < vmcore.flat". +# +# For core_collector format details, you can refer to +# kexec-kdump-howto.txt or kdump.conf manpage. +# +# kdump_post +# - This directive allows you to run a executable binary +# or script after the vmcore dump process terminates. +# The exit status of the current dump process is fed to +# the executable binary or script as its first argument. +# All files under /etc/kdump/post.d are collectively sorted +# and executed in lexical order, before binary or script +# specified kdump_post parameter is executed. +# +# kdump_pre +# - Works like the "kdump_post" directive, but instead of running +# after the dump process, runs immediately before it. +# Exit status of this binary is interpreted as follows: +# 0 - continue with dump process as usual +# non 0 - run the final action (reboot/poweroff/halt) +# All files under /etc/kdump/pre.d are collectively sorted and +# executed in lexical order, after binary or script specified +# kdump_pre parameter is executed. +# Even if the binary or script in /etc/kdump/pre.d directory +# returns non 0 exit status, the processing is continued. +# +# extra_bins +# - This directive allows you to specify additional binaries or +# shell scripts to be included in the kdump initrd. +# Generally they are useful in conjunction with a kdump_post +# or kdump_pre binary or script which depends on these extra_bins. +# +# extra_modules +# - This directive allows you to specify extra kernel modules +# that you want to be loaded in the kdump initrd. +# Multiple modules can be listed, separated by spaces, and any +# dependent modules will automatically be included. +# +# failure_action +# - Action to perform in case dumping fails. +# reboot: Reboot the system. +# halt: Halt the system. +# poweroff: Power down the system. +# shell: Drop to a bash shell. +# Exiting the shell reboots the system by default, +# or perform "final_action". +# dump_to_rootfs: Dump vmcore to rootfs from initramfs context and +# reboot by default or perform "final_action". +# Useful when non-root dump target is specified. +# The default option is "reboot". +# +# default +# - Same as the "failure_action" directive above, but this directive +# is obsolete and will be removed in the future. +# +# final_action +# - Action to perform in case dumping succeeds. Also performed +# when "shell" or "dump_to_rootfs" failure action finishes. +# Each action is same as the "failure_action" directive above. +# The default is "reboot". +# +# force_rebuild <0 | 1> +# - By default, kdump initrd will only be rebuilt when necessary. +# Specify 1 to force rebuilding kdump initrd every time when kdump +# service starts. +# +# force_no_rebuild <0 | 1> +# - By default, kdump initrd will be rebuilt when necessary. +# Specify 1 to bypass rebuilding of kdump initrd. +# +# force_no_rebuild and force_rebuild options are mutually +# exclusive and they should not be set to 1 simultaneously. +# +# override_resettable <0 | 1> +# - Usually an unresettable block device can't be a dump target. +# Specifying 1 when you want to dump even though the block +# target is unresettable +# By default, it is 0, which will not try dumping destined to fail. +# +# dracut_args +# - Pass extra dracut options when rebuilding kdump initrd. +# +# fence_kdump_args +# - Command line arguments for fence_kdump_send (it can contain +# all valid arguments except hosts to send notification to). +# +# fence_kdump_nodes +# - List of cluster node(s) except localhost, separated by spaces, +# to send fence_kdump notifications to. +# (this option is mandatory to enable fence_kdump). +# + +#raw /dev/vg/lv_kdump +#ext4 /dev/vg/lv_kdump +#ext4 LABEL=/boot +#ext4 UUID=03138356-5e61-4ab3-b58e-27507ac41937 +#nfs my.server.com:/export/tmp +#nfs [2001:db8::1:2:3:4]:/export/tmp +#ssh user@my.server.com +#ssh user@2001:db8::1:2:3:4 +#sshkey /root/.ssh/kdump_id_rsa +path /var/crash +core_collector makedumpfile -l --message-level 7 -d 31 +#core_collector scp +#kdump_post /var/crash/scripts/kdump-post.sh +#kdump_pre /var/crash/scripts/kdump-pre.sh +#extra_bins /usr/bin/lftp +#extra_modules gfs2 +#failure_action shell +#force_rebuild 1 +#force_no_rebuild 1 +#dracut_args --omit-drivers "cfg80211 snd" --add-drivers "ext2 ext3" +#fence_kdump_args -p 7410 -f auto -c 0 -i 10 +#fence_kdump_nodes node1 node2 diff --git a/SOURCES/kdump.conf.5 b/SOURCES/kdump.conf.5 new file mode 100644 index 0000000..2c5a2bc --- /dev/null +++ b/SOURCES/kdump.conf.5 @@ -0,0 +1,375 @@ +.TH KDUMP.CONF 5 "07/23/2008" "kexec-tools" + +.SH NAME +kdump.conf \- configuration file for kdump kernel. + +.SH DESCRIPTION + +kdump.conf is a configuration file for the kdump kernel crash +collection service. + +kdump.conf provides post-kexec instructions to the kdump kernel. It is +stored in the initrd file managed by the kdump service. If you change +this file and do not want to reboot in order for the changes to take +effect, restart the kdump service to rebuild the initrd. + +For most configurations, you can simply review the examples provided +in the stock /etc/kdump.conf. + +.B NOTE: +For filesystem dumps the dump target must be mounted before building +kdump initramfs. + +kdump.conf only affects the behavior of the initramfs. Please read the +kdump operational flow section of kexec-kdump-howto.txt in the docs to better +understand how this configuration file affects the behavior of kdump. + +.SH OPTIONS + +.B raw +.RS +Will dd /proc/vmcore into . Use persistent device names for +partition devices, such as /dev/vg/. +.RE + +.B nfs +.RS +Will mount nfs to , and copy /proc/vmcore to //%HOST-%DATE/, +supports DNS. Note that a fqdn should be used as the server name in the +mount point. +.RE + +.B ssh +.RS +Will save /proc/vmcore through ssh pipe to :/%HOST-%DATE/, +supports DNS. NOTE: make sure user has necessary write permissions on +server and that a fqdn is used as the server name. +.RE + +.B sshkey +.RS +Specify the path of the ssh key to use when dumping via ssh. +The default value is /root/.ssh/kdump_id_rsa. +.RE + +.B +.RS +Will mount -t , and copy /proc/vmcore to +//%HOST_IP-%DATE/. NOTE: can be a device node, label +or uuid. It's recommended to use persistent device names such as +/dev/vg/. Otherwise it's suggested to use label or uuid. +.RE + +.B path +.RS +"path" represents the file system path in which vmcore will be saved. +If a dump target is specified in kdump.conf, then "path" is relative to the +specified dump target. +.PP +Interpretation of "path" changes a bit if the user didn't specify any dump +target explicitly in kdump.conf. In this case, "path" represents the +absolute path from root. The dump target and adjusted path are arrived +at automatically depending on what's mounted in the current system. +.PP +Ignored for raw device dumps. If unset, will use the default "/var/crash". +.RE + +.B core_collector +.RS +This allows you to specify the command to copy the vmcore. +The default is makedumpfile, which on some architectures can drastically reduce +core file size. See /sbin/makedumpfile --help for a list of options. +Note that the -i and -g options are not needed here, as the initrd +will automatically be populated with a config file appropriate +for the running kernel. +.PP +Note 1: About default core collector: +The default core_collector for raw/ssh dump is: +"makedumpfile -F -l --message-level 7 -d 31". +The default core_collector for other targets is: +"makedumpfile -l --message-level 7 -d 31". +Even if core_collector option is commented out in kdump.conf, makedumpfile +is the default core collector and kdump uses it internally. +If one does not want makedumpfile as default core_collector, then they +need to specify one using core_collector option to change the behavior. +.PP +Note 2: If "makedumpfile -F" is used then you will get a flattened format +vmcore.flat, you will need to use "makedumpfile -R" to rearrange the +dump data from standard input to a normal dumpfile (readable with analysis +tools). +ie. "makedumpfile -R vmcore < vmcore.flat" +.PP +Note 3: If specified core_collector simply copy the vmcore file to the +dump target (eg: cp, scp), the vmcore could be significantly large. +Please make sure the dump target has enough space, at leaset larger +than the system's RAM. + +.RE + +.B kdump_post +.RS +This directive allows you to run a specified executable +just after the vmcore dump process terminates. The exit +status of the current dump process is fed to the kdump_post +executable as its first argument($1). Executable can modify +it to indicate the new exit status of succeeding dump process, +.PP +All files under /etc/kdump/post.d are collectively sorted +and executed in lexical order, before binary or script +specified kdump_post parameter is executed. +.PP +Note that scripts written for use with this directive must use +the /bin/bash interpreter. +.RE + +.B kdump_pre +.RS +Works just like the "kdump_post" directive, but instead +of running after the dump process, runs immediately +before. Exit status of this binary is interpreted +as follows: +.PP +0 - continue with dump process as usual +.PP +non 0 - run the final action (reboot/poweroff/halt) +.PP +All files under /etc/kdump/pre.d are collectively sorted and +executed in lexical order, after binary or script specified +kdump_pre parameter is executed. +Even if the binary or script in /etc/kdump/pre.d directory +returns non 0 exit status, the processing is continued. +.PP +Note that scripts written for this directive must use +the /bin/bash interpreter. +.RE + +.B extra_bins +.RS +This directive allows you to specify additional +binaries or shell scripts you'd like to include in +your kdump initrd. Generally only useful in +conjunction with a kdump_post binary or script that +relies on other binaries or scripts. +.RE + +.B extra_modules +.RS +This directive allows you to specify extra kernel +modules that you want to be loaded in the kdump +initrd, typically used to set up access to +non-boot-path dump targets that might otherwise +not be accessible in the kdump environment. Multiple +modules can be listed, separated by spaces, and any +dependent modules will automatically be included. +.RE + +.B failure_action +.RS +Action to perform in case dumping to the intended target fails. The default is "reboot". +reboot: Reboot the system (this is what most people will want, as it returns the system +to a normal state). halt: Halt the system and lose the vmcore. poweroff: The system +will be powered down. shell: Drop to a shell session inside the initramfs, from which +you can manually perform additional recovery actions. Exiting this shell reboots the +system by default or performs "final_action". +Note: kdump uses bash as the default shell. dump_to_rootfs: If non-root dump +target is specified, the failure action can be set as dump_to_rootfs. That means when +dumping to target fails, dump vmcore to rootfs from initramfs context and reboot +by default or perform "final_action". +.RE + +.B default +.RS +Same as the "failure_action" directive above, but this directive is obsolete +and will be removed in the future. +.RE + +.B final_action +.RS +Action to perform in case dumping to the intended target succeeds. +Also performed when "shell" or "dump_to_rootfs" failure action finishes. +Each action is same as the "failure_action" directive above. +The default is "reboot". +.RE + +.B force_rebuild <0 | 1> +.RS +By default, kdump initrd will only be rebuilt when necessary. +Specify 1 to force rebuilding kdump initrd every time when kdump service starts. +.RE + +.B force_no_rebuild <0 | 1> +.RS +By default, kdump initrd will be rebuilt when necessary. +Specify 1 to bypass rebuilding of kdump initrd. + +.PP +force_no_rebuild and force_rebuild options are mutually exclusive and +they should not be set to 1 simultaneously. +.RE + +.B override_resettable <0 | 1> +.RS +Usually an unresettable block device can't be a dump target. Specifying 1 means +that even though the block target is unresettable, the user wants to try dumping anyway. +By default, it's set to 0, which will not try something destined to fail. +.RE + + +.B dracut_args +.RS +Kdump uses dracut to generate initramfs for second kernel. This option +allows a user to pass arguments to dracut directly. +.RE + + +.B fence_kdump_args +.RS +Command line arguments for fence_kdump_send (it can contain all valid +arguments except hosts to send notification to). +.RE + + +.B fence_kdump_nodes +.RS +List of cluster node(s) except localhost, separated by spaces, to send fence_kdump notification +to (this option is mandatory to enable fence_kdump). +.RE + + +.SH DEPRECATED OPTIONS + +.B net | +.RS +net option is replaced by nfs and ssh options. Use nfs or ssh options +directly. +.RE + +.B options