Blob Blame History Raw
#! /bin/sh
KEXEC=/sbin/kexec

KDUMP_KERNELVER=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
MKDUMPRD="/sbin/mkdumprd -f"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DUMP_TARGET=""

. /lib/kdump/kdump-lib.sh

standard_kexec_args="-p"

if [ -f /etc/sysconfig/kdump ]; then
	. /etc/sysconfig/kdump
fi

single_instance_lock()
{
	exec 9>/var/lock/kdump
	flock 9
}

# remove_cmdline_param <kernel cmdline> <param1> [<param2>] ... [<paramN>]
# Remove a list of kernel parameters from a given kernel cmdline and print the result.
# For each "arg" in the removing params list, "arg" and "arg=xxx" will be removed if exists.
function remove_cmdline_param()
{
	local cmdline=$1
	shift

	for arg in $@; do
		cmdline=`echo $cmdline | \
			 sed -e "s/\b$arg=[^ ]*\b//g" \
			     -e "s/\b$arg\b//g" \
			     -e "s/\s\+/ /g"`
	done
	echo $cmdline
}

#
# This function returns the "initial apicid" of the
# boot cpu (cpu 0) if present.
#
function get_bootcpu_initial_apicid()
{
    awk '							\
	BEGIN { CPU = "-1"; }					\
	$1=="processor" && $2==":"	{ CPU = $NF; }		\
	CPU=="0" && /initial apicid/	{ print $NF; }		\
	'							\
	/proc/cpuinfo
}

#
# This function appends argument "$2=$3" to string ($1) if not already present.
#
function append_cmdline()
{
    local cmdline=$1
    local newstr=${cmdline/$2/""}

    # unchanged str implies argument wasn't there
    if [ "$cmdline" == "$newstr" ]; then
	cmdline="${cmdline} ${2}=${3}"
    fi

    echo $cmdline
}

# This function performs a series of edits on the command line
function prepare_cmdline()
{
	local cmdline;
	if [ -z "$KDUMP_COMMANDLINE" ]; then
		cmdline=`cat /proc/cmdline`
	else
		cmdline=${KDUMP_COMMANDLINE}
	fi
	cmdline=`remove_cmdline_param "$cmdline" crashkernel hugepages hugepagesz`


	cmdline="${cmdline} ${KDUMP_COMMANDLINE_APPEND}"

	local id=`get_bootcpu_initial_apicid`
	if [ ! -z ${id} ] ; then
		cmdline=`append_cmdline "${cmdline}" disable_cpu_apicid ${id}`
	fi

	echo $cmdline
}


function save_core()
{
	coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"

	mkdir -p $coredir
	cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
	if [ $? == 0 ]; then
		mv $coredir/vmcore-incomplete $coredir/vmcore
		echo "saved a vmcore to $coredir"
	else
		echo "failed to save a vmcore to $coredir" >&2
	fi

	# pass the dmesg to Abrt tool if exists, in order
	# to collect the kernel oops message.
	# https://fedorahosted.org/abrt/
	if [ -x /usr/bin/dumpoops ]; then
		makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
		dumpoops -d $coredir/dmesg >/dev/null 2>&1
		if [ $? == 0 ]; then
			echo "kernel oops has been collected by abrt tool"
		fi
	fi
}

function rebuild_initrd()
{
	$MKDUMPRD $kdump_initrd $kdump_kver
	if [ $? != 0 ]; then
		echo "mkdumprd: failed to make kdump initrd" >&2
		return 1
	fi
}

#$1: the files to be checked with IFS=' '
function check_exist()
{
	for file in $1; do
		if [ ! -f "$file" ]; then
			echo -n "Error: $file not found."; echo
			return 1
		fi
	done
}

#$1: the files to be checked with IFS=' '
function check_executable()
{
	for file in $1; do
		if [ ! -x "$file" ]; then
			echo -n "Error: $file is not executable."; echo
			return 1
		fi
	done
}

function check_config()
{
	local nr

	nr=$(awk 'BEGIN{cnt=0} /^raw|^ssh[[:blank:]]|^nfs|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
	[ $nr -gt 1 ] && {
		echo "More than one dump targets specified."
		return 1
	}

	while read config_opt config_val; do
               # remove inline comments after the end of a directive.                                                                      
               config_val=$(strip_comments $config_val)
		case "$config_opt" in
		\#* | "")
			;;
		raw|ext2|ext3|ext4|minix|btrfs|xfs|nfs|ssh|sshkey|path|core_collector|kdump_post|kdump_pre|extra_bins|extra_modules|default|force_rebuild|dracut_args)
			[ -z "$config_val" ] && {
				echo "Invalid kdump config value for option $config_opt."
				return 1;
			}
			;;
		net|options|link_delay|disk_timeout|debug_mem_level|blacklist)
			echo "Deprecated kdump config option: $config_opt. Refer to kdump.conf manpage for alternatives."
			return 1
			;;
		*)
			echo "Invalid kdump config option $config_opt"
			return 1;
			;;
		esac
	done < $KDUMP_CONFIG_FILE
	return 0
}

# check_fence_kdump <image timestamp>
# return 0 if fence_kdump is configured and kdump initrd needs to be rebuilt
function check_fence_kdump()
{
	local image_time=$1
	local cib_time

	is_fence_kdump || return 1

	cib_time=`pcs cluster cib | xmllint --xpath 'string(/cib/@cib-last-written)' - | \
		  xargs -0 date +%s --date`

	if [ -z $cib_time -o $cib_time -le $image_time ]; then
		return 1
	fi

	return 0
}

function check_rebuild()
{
	local extra_modules modified_files=""
	local _force_rebuild force_rebuild="0"

	if [ -z "$KDUMP_KERNELVER" ]; then
		kdump_kver=`uname -r`
	else
		kdump_kver=$KDUMP_KERNELVER
	fi

	kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
	kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"

	_force_rebuild=`grep ^force_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
	if [ $? -eq 0 ]; then
		force_rebuild=`echo $_force_rebuild | cut -d' '  -f2`
		if [ "$force_rebuild" != "0" ] && [ "$force_rebuild" != "1" ];then
			echo "Error: force_rebuild value is invalid"
			return 1
		fi
	fi

	#will rebuild every time if extra_modules are specified
	extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
	[ -n "$extra_modules" ] && force_rebuild="1"

	#check to see if dependent files has been modified
	#since last build of the image file
	if [ -f $kdump_initrd ]; then
		image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
	else
		image_time=0
	fi

	#also rebuild when cluster conf is changed and fence kdump is enabled.
	check_fence_kdump $image_time && modified_files="cluster-cib"

	EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\  -f2`
	CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\  -f2`
	EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
	CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\  -f2-`
	EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
	files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"

	if [ -f $FENCE_KDUMP_CONFIG ]; then
		files="$files $FENCE_KDUMP_CONFIG"
	fi

	check_exist "$files" && check_executable "$EXTRA_BINS"
	[ $? -ne 0 ] && return 1

	for file in $files; do
		time_stamp=`stat -c "%Y" $file`
		if [ "$time_stamp" -gt "$image_time" ]; then
			modified_files="$modified_files $file"
		fi
	done

	if [ $image_time -eq 0 ]; then
		echo  -n "No kdump initial ramdisk found."; echo
	elif [ "$force_rebuild" != "0" ]; then
		echo -n "Force rebuild $kdump_initrd"; echo
	elif [ -n "$modified_files" ]; then
		echo "Detected change(s) the following file(s):"
		echo -n "  "; echo "$modified_files" | sed 's/\s/\n  /g'
	else
		return 0
	fi

	echo "Rebuilding $kdump_initrd"
	rebuild_initrd
	return $?
}

# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
function need_64bit_headers()
{
    return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
    print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
}

# Load the kdump kerel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
function load_kdump()
{
	MEM_RESERVED=$(cat /sys/kernel/kexec_crash_size)
	if [ $MEM_RESERVED -eq 0 ]
	then
		echo "No memory reserved for crash kernel." >&2
		return 1
	fi

	ARCH=`uname -m`
	if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
	then

		need_64bit_headers
		if [ $? == 1 ]
		then
			FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
			if [ -n "$FOUND_ELF_ARGS" ]
			then
				echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
				echo
			else	
				KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
			fi
		else
			FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
			if [ -z "$FOUND_ELF_ARGS" ]
			then
				KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
			fi
		fi
	fi

	KDUMP_COMMANDLINE=`prepare_cmdline`

	$KEXEC $KEXEC_ARGS $standard_kexec_args \
		--command-line="$KDUMP_COMMANDLINE" \
		--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
	if [ $? == 0 ]; then
		echo "kexec: loaded kdump kernel"
		return 0
	else
		echo "kexec: failed to load kdump kernel" >&2
		return 1
	fi
}

function check_ssh_config()
{
	while read config_opt config_val; do
               # remove inline comments after the end of a directive.
               config_val=$(strip_comments $config_val)
		case "$config_opt" in
		sshkey)
			if [ -f "$config_val" ]; then
				# canonicalize the path
				SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
			else
				echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
			fi
			;;
		path)
			SAVE_PATH=$config_val
			;;
		ssh)
			DUMP_TARGET=$config_val
			;;
		*)
			;;
		esac
	done < $KDUMP_CONFIG_FILE

	#make sure they've configured kdump.conf for ssh dumps
	local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
	if [ -z "$SSH_TARGET" ]; then
		return 1
	fi
	return 0
}

function check_ssh_target()
{
	local _ret
	ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
	_ret=$?
	if [ $_ret -ne 0 ]; then
		echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\""  >&2
		return 1
	fi
	return 0
}

function propagate_ssh_key()
{
	check_ssh_config
	if [ $? -ne 0 ]; then
		echo "No ssh config specified in $KDUMP_CONFIG_FILE.  Can't propagate" >&2
		exit 1
	fi

	local KEYFILE=$SSH_KEY_LOCATION
	local errmsg="Failed to propagate ssh key"

	#Check to see if we already created key, if not, create it.
	if [ -f $KEYFILE ]; then
		echo "Using existing keys..."
	else
		echo -n "Generating new ssh keys... "
		/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
		echo "done."
	fi

	#now find the target ssh user and server to contact.
	SSH_USER=`echo $DUMP_TARGET | cut -d\  -f2 | cut -d@ -f1`
	SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
 
	#now send the found key to the found server
	ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
	RET=$?
	if [ $RET == 0 ]; then
		echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
		return 0
	else
		echo $errmsg, $KEYFILE failed in transfer to $SSH_SERVER  >&2
		exit 1
	fi
		
}

function check_current_kdump_status()
{
	rc=`cat /sys/kernel/kexec_crash_loaded`
	if [ $rc == 1 ]; then
		return 0
	else
		return 1
	fi
}

function save_raw()
{
	local kdump_dir
	local raw_target

	raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
	[ -z "$raw_target" ] && return 0
	[ -b "$raw_target" ] || {
		echo "raw partition $raw_target not found"
		return 1
	}
	kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' '  -f2-`
	if [ -z "${kdump_dir}" ]; then
		coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
	else
		coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
	fi

	mkdir -p "$coredir"
	[ -d "$coredir" ] || {
		echo "failed to create $coredir"
		return 1
	}
	if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
		# dump found
		echo "Dump saved to $coredir/vmcore"
		# wipe makedumpfile header
		dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
	else
		rm -rf "$coredir"
	fi

	return 0
}

get_save_path() {
	local _save_path=$(grep "^path" /etc/kdump.conf|awk '{print $2}')
	if [ -z "$_save_path" ]; then
		_save_path="/var/crash"
	fi

	echo $_save_path
}

is_dump_target_configured() {
    local _target

    _target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw|^ssh|^nfs" /etc/kdump.conf)

     [ -n "$_target" ]
}

local_fs_dump_target()
{
	local _target

	_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf)
	if [ $? -eq 0 ]; then
		echo $_target|awk '{print $2}'
	fi
}

path_to_be_relabeled() {
	local _path _target _mnt="/" _rmnt

	if is_dump_target_configured; then
		_target=$(local_fs_dump_target)
		if [[ -n "$_target" ]]; then
			_mnt=$(findmnt -k -f -n -r -o TARGET $_target)
			if [ -z "$_mnt" ]; then
				return
			fi
		else
			return
		fi
	fi

	_path=$(get_save_path)
	# if $_path is masked by other mount, we will not relabel it.
	_rmnt=$(df $_mnt/$_path 2>/dev/null | tail -1 | awk '{ print $NF }')
	if [ "$_rmnt" == "$_mnt" ]; then
		echo $_mnt/$_path
	fi
}

selinux_relabel()
{
	local _path _i _attr

	_path=$(path_to_be_relabeled)
	if [ -z "$_path" ] || ! [ -d "$_path" ] ; then
		return
	fi

	for _i in $(find $_path); do
		_attr=$(getfattr -m "security.selinux" $_i 2>/dev/null)
		if [ -z "$_attr" ]; then
			restorecon $_i;
		fi
	done
}

# Check if secure boot is being enforced.
#
# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and
# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four
# bytes are the attributes associated with the variable and can safely be
# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot
# is 1 and SetupMode is 0, then secure boot is being enforced.
#
# SecureBoot-UUID won't always be set when securelevel is 1. For legacy-mode
# and uefi-without-seucre-enabled system, we can manually enable secure mode
# by writing "1" to securelevel. So check both efi var and secure mode is a 
# more sane way.
#
# Assume efivars is mounted at /sys/firmware/efi/efivars.
function is_secure_boot_enforced()
{
	local secure_boot_file setup_mode_file
	local secure_boot_byte setup_mode_byte

	secure_boot_file=$(find /sys/firmware/efi/efivars -name SecureBoot-* 2>/dev/null)
	setup_mode_file=$(find /sys/firmware/efi/efivars -name SetupMode-* 2>/dev/null)

	if [ -f "$secure_boot_file" ] && [ -f "$setup_mode_file" ]; then
		secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' $secure_boot_file|cut -d' ' -f 5)
		setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' $setup_mode_file|cut -d' ' -f 5)

		if [ "$secure_boot_byte" = "1" ] && [ "$setup_mode_byte" = "0" ]; then
			return 0
		fi
	fi

	return 1
}

# Check if secure mode is being enforced (securelevel =? 1)
function is_secure_mode_enforced()
{
	local secure_mode_byte

	secure_mode_byte=$(cat /sys/kernel/security/securelevel)

	if [ "$secure_mode_byte" = "1" ]; then
		return 0
	fi

	return 1
}

function check_kdump_feasibility()
{
	if is_secure_boot_enforced; then
		echo "Secure Boot is Enabled. Kdump service can't be started. Disable Secure Boot and retry"
		return 1;
	elif is_secure_mode_enforced; then
		echo "securelevel is set to 1 (Secure Mode). Kdump service can't be started."
		return 1
	fi

	if [ ! -e /sys/kernel/kexec_crash_loaded ]; then
		echo "Kdump is not supported on this kernel"
		return 1
	fi
}

function start()
{
	check_config
	if [ $? -ne 0 ]; then
		echo "Starting kdump: [FAILED]"
		return 1
	fi

	if sestatus 2>/dev/null | grep -q "SELinux status.*enabled"; then
		selinux_relabel
	fi
	save_raw
	if [ $? -ne 0 ]; then
		echo "Starting kdump: [FAILED]"
		return 1
	fi

	check_kdump_feasibility
	if [ $? -ne 0 ]; then
		echo "Starting kdump: [FAILED]"
		return 1
	fi

	check_current_kdump_status
	if [ $? == 0 ]; then
		echo "Kdump already running: [WARNING]"
		return 0
	fi

	if check_ssh_config; then
		if ! check_ssh_target; then
			echo "Starting kdump: [FAILED]"
			return 1
		fi
	fi

	check_rebuild
	if [ $? != 0 ]; then
		echo "Starting kdump: [FAILED]"
		return 1
	fi
	load_kdump
	if [ $? != 0 ]; then
		echo "Starting kdump: [FAILED]"
		return 1
	fi

	echo "Starting kdump: [OK]"
}

function stop()
{
	$KEXEC -p -u 2>/dev/null
	if [ $? == 0 ]; then
		echo "kexec: unloaded kdump kernel"
		echo "Stopping kdump: [OK]"
		return 0
	else
		echo "kexec: failed to unloaded kdump kernel"
		echo "Stopping kdump: [FAILED]"
		return 1
	fi
}

if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
	echo "Error: No kdump config file found!"  >&2
	exit 1
fi

main ()
{
	case "$1" in
	  start)
		if [ -s /proc/vmcore ]; then
			save_core
			reboot
		else
			start
		fi
		;;
	  stop)
		stop
		;;
	  status)
		EXIT_CODE=0
		check_current_kdump_status
		case "$?" in
		  0)
			echo "Kdump is operational"
			EXIT_CODE=0
			;;
		  1)
			echo "Kdump is not operational"
			EXIT_CODE=3
			;;
		esac
		exit $EXIT_CODE
		;;
	  restart)
		stop
		start
		;;
	  condrestart)
		;;
	  propagate)
		propagate_ssh_key
		;;
	  *)
		echo $"Usage: $0 {start|stop|status|restart|propagate}"
		exit 1
	esac
}

# Other kdumpctl instances will block in queue, until this one exits
single_instance_lock

# To avoid fd 9 leaking, we invoke a subshell, close fd 9 and call main.
# So that fd isn't leaking when main is invoking a subshell.
(exec 9<&-; main $1)

exit $?