diff --git a/.kexec-tools.metadata b/.kexec-tools.metadata index e519eef..3927396 100644 --- a/.kexec-tools.metadata +++ b/.kexec-tools.metadata @@ -1,4 +1,4 @@ dcdb6d2488c8a31ae95563e2113860ae16256c8f SOURCES/eppic_030413.tar.gz -d903c5631b96913a4b31d4cc5df8aab4837e0e45 SOURCES/kexec-tools-po.tar.gz +6da479b880470fd57ecf040e0b1d1ffce21f168b SOURCES/kexec-tools-po-20131224.tgz f119507a92446bcda58b108fd7f61e6d9f187358 SOURCES/kexec-tools-2.0.4.tar.bz2 077dfb8fbf2f12b5efca385eb1fda55dee2096d5 SOURCES/makedumpfile-1.5.4.tar.gz diff --git a/SOURCES/dracut-kdump.sh b/SOURCES/dracut-kdump.sh index f38891d..d9e65ac 100755 --- a/SOURCES/dracut-kdump.sh +++ b/SOURCES/dracut-kdump.sh @@ -11,7 +11,7 @@ fi set -o pipefail KDUMP_PATH="/var/crash" CORE_COLLECTOR="" -DEFAULT_CORE_COLLECTOR="makedumpfile -c --message-level 1 -d 31" +DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 1 -d 31" DMESG_COLLECTOR="/sbin/vmcore-dmesg" DEFAULT_ACTION="reboot -f" DATEDIR=`date +%Y.%m.%d-%T` @@ -287,6 +287,21 @@ read_kdump_conf() done < $conf_file } +fence_kdump_notify() +{ + local nodes + + if [ -f $FENCE_KDUMP_NODES ]; then + if [ -f $FENCE_KDUMP_CONFIG ]; then + . $FENCE_KDUMP_CONFIG + fi + + read nodes < $FENCE_KDUMP_NODES + $FENCE_KDUMP_SEND $FENCE_KDUMP_OPTS $nodes & + fi +} + +fence_kdump_notify read_kdump_conf if [ -z "$CORE_COLLECTOR" ];then @@ -312,7 +327,7 @@ if [ $? -ne 0 ]; then echo "kdump: kdump_pre script exited with non-zero status!" do_final_action fi - +make_trace_mem "kdump saving vmcore" '1:shortmem' '2+:mem' '3+:slab' do_dump DUMP_RETVAL=$? diff --git a/SOURCES/dracut-module-setup.sh b/SOURCES/dracut-module-setup.sh index c013430..bdadf7c 100755 --- a/SOURCES/dracut-module-setup.sh +++ b/SOURCES/dracut-module-setup.sh @@ -20,6 +20,10 @@ depends() { _dep="$_dep drm" fi + if is_fence_kdump; then + _dep="$_dep network" + fi + echo $_dep return 0 } @@ -178,7 +182,7 @@ kdump_setup_znet() { # Setup dracut to bringup a given network interface kdump_setup_netdev() { local _netdev=$1 - local _static _proto + local _static _proto _ip_conf _ip_opts _ifname_opts if [ "$(uname -m)" = "s390x" ]; then kdump_setup_znet $_netdev @@ -192,7 +196,14 @@ kdump_setup_netdev() { _proto=dhcp fi - echo " ip=${_static}$_netdev:${_proto}" > ${initdir}/etc/cmdline.d/40ip.conf + _ip_conf="${initdir}/etc/cmdline.d/40ip.conf" + _ip_opts=" ip=${_static}$_netdev:${_proto}" + + # dracut doesn't allow duplicated configuration for same NIC, even they're exactly the same. + # so we have to avoid adding duplicates + if [ ! -f $_ip_conf ] || ! grep -q $_ip_opts $_ip_conf; then + echo "$_ip_opts" >> $_ip_conf + fi if kdump_is_bridge "$_netdev"; then kdump_setup_bridge "$_netdev" @@ -203,7 +214,8 @@ kdump_setup_netdev() { elif kdump_is_vlan "$_netdev"; then kdump_setup_vlan "$_netdev" else - echo " ifname=$_netdev:$(kdump_get_mac_addr $_netdev)" >> ${initdir}/etc/cmdline.d/40ip.conf + _ifname_opts=" ifname=$_netdev:$(kdump_get_mac_addr $_netdev)" + echo "$_ifname_opts" >> $_ip_conf fi kdump_setup_dns "$_netdev" @@ -234,9 +246,19 @@ kdump_install_net() { fi kdump_setup_netdev "${_netdev}" + #save netdev used for kdump as cmdline - echo "kdumpnic=${_netdev}" > ${initdir}/etc/cmdline.d/60kdumpnic.conf - echo "bootdev=${_netdev}" > ${initdir}/etc/cmdline.d/70bootdev.conf + # Whoever calling kdump_install_net() is setting up the default gateway, + # ie. bootdev/kdumpnic. So don't override the setting if calling + # kdump_install_net() for another time. For example, after setting eth0 as + # the default gate way for network dump, eth1 in the fence kdump path will + # call kdump_install_net again and we don't want eth1 to be the default + # gateway. + if [ ! -f ${initdir}${initdir}/etc/cmdline.d/60kdumpnic.conf ] && + [ ! -f ${initdir}/etc/cmdline.d/70bootdev.conf ]; then + echo "kdumpnic=${_netdev}" > ${initdir}/etc/cmdline.d/60kdumpnic.conf + echo "bootdev=${_netdev}" > ${initdir}/etc/cmdline.d/70bootdev.conf + fi } #install kdump.conf and what user specifies in kdump.conf @@ -263,6 +285,7 @@ kdump_install_conf() { esac done < /etc/kdump.conf + kdump_check_fence_kdump inst "/tmp/$$-kdump.conf" "/etc/kdump.conf" rm -f /tmp/$$-kdump.conf } @@ -393,12 +416,56 @@ kdump_check_iscsi_targets () { } +# setup fence_kdump in cluster +# setup proper network and install needed files +# also preserve '[node list]' for 2nd kernel /etc/fence_kdump_nodes +kdump_check_fence_kdump () { + local nodes + is_fence_kdump || return 1 + + # get cluster nodes from cluster cib, get interface and ip address + nodelist=`pcs cluster cib | xmllint --xpath "/cib/status/node_state/@uname" -` + + # nodelist is formed as 'uname="node1" uname="node2" ... uname="nodeX"' + # we need to convert each to node1, node2 ... nodeX in each iteration + for node in ${nodelist}; do + # convert $node from 'uname="nodeX"' to 'nodeX' + eval $node + nodename=$uname + # Skip its own node name + if [ "$nodename" = `hostname` ]; then + continue + fi + nodes="$nodes $nodename" + + kdump_install_net $nodename + done + echo + + echo "$nodes" > ${initdir}/$FENCE_KDUMP_NODES + dracut_install $FENCE_KDUMP_SEND + dracut_install -o $FENCE_KDUMP_CONFIG +} + +# Install a random seed used to feed /dev/urandom +# By the time kdump service starts, /dev/uramdom is already fed by systemd +kdump_install_random_seed() { + local poolsize=`cat /proc/sys/kernel/random/poolsize` + + if [ ! -d ${initdir}/var/lib/ ]; then + mkdir -p ${initdir}/var/lib/ + fi + + dd if=/dev/urandom of=${initdir}/var/lib/random-seed \ + bs=$poolsize count=1 2> /dev/null +} + install() { kdump_install_conf >"$initdir/lib/dracut/no-emergency-shell" if is_ssh_dump_target; then - dracut_install /var/lib/random-seed || exit $? + kdump_install_random_seed fi dracut_install -o /etc/adjtime /etc/localtime inst "$moddir/monitor_dd_progress" "/kdumpscripts/monitor_dd_progress" @@ -418,3 +485,11 @@ install() { # at some point of time. kdump_check_iscsi_targets } + +installkernel() { + wdt=$(lsmod|cut -f1 -d' '|grep "wdt$") + if [ -n "$wdt" ]; then + [ "$wdt" = "iTCO_wdt" ] && instmods lpc_ich + instmods $wdt + fi +} diff --git a/SOURCES/kdump-in-cluster-environment.txt b/SOURCES/kdump-in-cluster-environment.txt new file mode 100644 index 0000000..c27a5d7 --- /dev/null +++ b/SOURCES/kdump-in-cluster-environment.txt @@ -0,0 +1,66 @@ +Kdump-in-cluster-environment HOWTO + +Introduction + +Kdump is a kexec based crash dumping mechansim for Linux. This docuement +illustrate how to configure kdump in cluster environment to allow the kdump +crash recovery service complete without being preempted by traditional power +fencing methods. + +Overview + +Kexec/Kdump + +Details about Kexec/Kdump are available in Kexec-Kdump-howto file and will not +be described here. + +fence_kdump + +fence_kdump is an I/O fencing agent to be used with the kdump crash recovery +service. When the fence_kdump agent is invoked, it will listen for a message +from the failed node that acknowledges that the failed node is executing the +kdump crash kernel. Note that fence_kdump is not a replacement for traditional +fencing methods. The fence_kdump agent can only detect that a node has entered +the kdump crash recovery service. This allows the kdump crash recovery service +complete without being preempted by traditional power fencing methods. + +fence_kdump_send + +fence_kdump_send is a utility used to send messages that acknowledge that the +node itself has entered the kdump crash recovery service. The fence_kdump_send +utility is typically run in the kdump kernel after a cluster node has +encountered a kernel panic. Once the cluster node has entered the kdump crash +recovery service, fence_kdump_send will periodically send messages to all +cluster nodes. When the fence_kdump agent receives a valid message from the +failed nodes, fencing is complete. + +How to configure cluster environment: + +If we want to use kdump in cluster environment, fence-agents-kdump should be +installed in every nodes in the cluster. You can achieve this via the following +command: + + # yum install -y fence-agents-kdump + +Next is to add kdump_fence to the cluster. Assuming that the cluster consists +of three nodes, they are node1, node2 and node3, and use Pacemaker to perform +resource management and pcs as cli configuration tool. + +With pcs it is easy to add a stonith resource to the cluster. For example, add +a stonith resource named mykdumpfence with fence type of fence_kdump via the +following commands: + + # pcs stonith create mykdumpfence fence_kdump \ + pcmk_host_check=static-list pcmk_host_list="node1 node2 node3" + # pcs stonith update mykdumpfence pcmk_monitor_action=metadata --force + # pcs stonith update mykdumpfence pcmk_status_action=metadata --force + # pcs stonith update mykdumpfence pcmk_reboot_action=off --force + +Then enable stonith + # pcs property set stonith-enabled=true + +How to configure kdump: + +Actually there is nothing special in configuration between normal kdump and +cluster environment kdump. So please refer to Kexec-Kdump-howto file for more +information. diff --git a/SOURCES/kdump-lib.sh b/SOURCES/kdump-lib.sh index e73ac09..de32650 100755 --- a/SOURCES/kdump-lib.sh +++ b/SOURCES/kdump-lib.sh @@ -1,8 +1,12 @@ #!/bin/sh # -# Kdump common functions +# Kdump common variables and functions # +FENCE_KDUMP_CONFIG="/etc/sysconfig/fence_kdump" +FENCE_KDUMP_SEND="/usr/libexec/fence_kdump_send" +FENCE_KDUMP_NODES="/etc/fence_kdump_nodes" + is_ssh_dump_target() { grep -q "^ssh[[:blank:]].*@" /etc/kdump.conf @@ -22,3 +26,52 @@ strip_comments() { echo $@ | sed -e 's/\(.*\)#.*/\1/' } + +# Check if fence kdump is configured in cluster +is_fence_kdump() +{ + # no pcs or fence_kdump_send executables installed? + type -P pcs > /dev/null || return 1 + [ -x $FENCE_KDUMP_SEND ] || return 1 + + # fence kdump not configured? + (pcs cluster cib | grep -q 'type="fence_kdump"') &> /dev/null || return 1 +} + +get_user_configured_dump_disk() +{ + local _target + + if is_ssh_dump_target || is_nfs_dump_target; then + return + fi + + _target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw" /etc/kdump.conf 2>/dev/null |awk '{print $2}') + [ -n "$_target" ] && echo $_target + + return +} + +is_user_configured_dump_target() +{ + local _target + + if is_ssh_dump_target || is_nfs_dump_target; then + return 0 + fi + + _target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw" /etc/kdump.conf 2>/dev/null |awk '{print $2}') + [ -n "$_target" ] && return 0 + + return 1 +} + +get_root_fs_device() +{ + local _target + _target=$(findmnt -k -f -n -o SOURCE /) + [ -n "$_target" ] && echo $_target + + return +} + diff --git a/SOURCES/kdump.conf b/SOURCES/kdump.conf index c5cfb4a..a106462 100644 --- a/SOURCES/kdump.conf +++ b/SOURCES/kdump.conf @@ -50,9 +50,9 @@ # automatically be populated with a config file # appropriate for the running kernel. # Default core_collector for raw/ssh dump is: -# "makedumpfile -F -c --message-level 1 -d 31". +# "makedumpfile -F -l --message-level 1 -d 31". # Default core_collector for other targets is: -# "makedumpfile -c --message-level 1 -d 31". +# "makedumpfile -l --message-level 1 -d 31". # For core_collector format details please refer to # kexec-kdump-howto.txt or kdump.conf manpage. # @@ -132,7 +132,7 @@ #ssh user@my.server.com #sshkey /root/.ssh/kdump_id_rsa path /var/crash -#core_collector makedumpfile -c --message-level 1 -d 31 +core_collector makedumpfile -l --message-level 1 -d 31 #core_collector scp #kdump_post /var/crash/scripts/kdump-post.sh #kdump_pre /var/crash/scripts/kdump-pre.sh diff --git a/SOURCES/kdump.conf.5 b/SOURCES/kdump.conf.5 index 6f88370..7eaf9dd 100644 --- a/SOURCES/kdump.conf.5 +++ b/SOURCES/kdump.conf.5 @@ -78,9 +78,9 @@ for the running kernel. .PP Note 1: About default core collector: Default core_collector for raw/ssh dump is: -"makedumpfile -F -c --message-level 1 -d 31". +"makedumpfile -F -l --message-level 1 -d 31". Default core_collector for other targets is: -"makedumpfile -c --message-level 1 -d 31". +"makedumpfile -l --message-level 1 -d 31". Even if core_collector option is commented out in kdump.conf, makedumpfile is default core collector and kdump uses it internally. If one does not want makedumpfile as default core_collector, then they @@ -253,11 +253,11 @@ Above will effectively be translated to: cp --sparse=always /proc/vmcore <dest-path>/vmcore .TP ex2. -core_collector "makedumpfile -c --message-level 1 -d 31" +core_collector "makedumpfile -l --message-level 1 -d 31" Above will effectively be translated to: -makedumpfile -c --message-level 1 -d 31 /proc/vmcore <dest-path>/vmcore +makedumpfile -l --message-level 1 -d 31 /proc/vmcore <dest-path>/vmcore .PP For dump targets like raw and ssh, in general, core collector should expect one argument (source file) and should output the processed core on standard @@ -274,11 +274,11 @@ Above will effectively be translated to. cat /proc/vmcore | dd of=<target-device> .TP ex4. -core_collector "makedumpfile -F -c --message-level 1 -d 31" +core_collector "makedumpfile -F -l --message-level 1 -d 31" Above will effectively be translated to. -makedumpfile -F -c --message-level 1 -d 31 | dd of=<target-device> +makedumpfile -F -l --message-level 1 -d 31 | dd of=<target-device> .PP ssh dumps examples .TP @@ -290,11 +290,11 @@ Above will effectively be translated to. cat /proc/vmcore | ssh <options> <remote-location> "dd of=path/vmcore" .TP ex6. -core_collector "makedumpfile -F -c --message-level 1 -d 31" +core_collector "makedumpfile -F -l --message-level 1 -d 31" Above will effectively be translated to. -makedumpfile -F -c --message-level 1 -d 31 | ssh <options> <remote-location> "dd of=path/vmcore" +makedumpfile -F -l --message-level 1 -d 31 | ssh <options> <remote-location> "dd of=path/vmcore" There is one exception to standard output rule for ssh dumps. And that is scp. As scp can handle ssh destinations for file transfers, one can diff --git a/SOURCES/kdump.sysconfig.s390x b/SOURCES/kdump.sysconfig.s390x index b55515a..848b043 100644 --- a/SOURCES/kdump.sysconfig.s390x +++ b/SOURCES/kdump.sysconfig.s390x @@ -16,7 +16,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="maxcpus=1 cgroup_disable=memory numa=off udev.children-max=2 panic=10 rootflags=nofail" +KDUMP_COMMANDLINE_APPEND="nr_cpus=1 cgroup_disable=memory numa=off udev.children-max=2 panic=10 rootflags=nofail" # Any additional /sbin/mkdumprd arguments required. MKDUMPRD_ARGS="" diff --git a/SOURCES/kdump.sysconfig.x86_64 b/SOURCES/kdump.sysconfig.x86_64 index 7e4d611..989c3c7 100644 --- a/SOURCES/kdump.sysconfig.x86_64 +++ b/SOURCES/kdump.sysconfig.x86_64 @@ -16,7 +16,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off numa=off udev.children-max=2 panic=10 rootflags=nofail" +KDUMP_COMMANDLINE_APPEND="irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off numa=off udev.children-max=2 panic=10 rootflags=nofail acpi_no_memhotplug" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/SOURCES/kdumpctl b/SOURCES/kdumpctl index 358ef05..5721f7f 100755 --- a/SOURCES/kdumpctl +++ b/SOURCES/kdumpctl @@ -41,6 +41,59 @@ function remove_cmdline_param() echo $cmdline } +# +# This function returns the "initial apicid" of the +# boot cpu (cpu 0) if present. +# +function get_bootcpu_initial_apicid() +{ + awk ' \ + BEGIN { CPU = "-1"; } \ + $1=="processor" && $2==":" { CPU = $NF; } \ + CPU=="0" && /initial apicid/ { print $NF; } \ + ' \ + /proc/cpuinfo +} + +# +# This function appends argument "$2=$3" to string ($1) if not already present. +# +function append_cmdline() +{ + local cmdline=$1 + local newstr=${cmdline/$2/""} + + # unchanged str implies argument wasn't there + if [ "$cmdline" == "$newstr" ]; then + cmdline="${cmdline} ${2}=${3}" + fi + + echo $cmdline +} + +# This function performs a series of edits on the command line +function prepare_cmdline() +{ + local cmdline; + if [ -z "$KDUMP_COMMANDLINE" ]; then + cmdline=`cat /proc/cmdline` + else + cmdline=${KDUMP_COMMANDLINE} + fi + cmdline=`remove_cmdline_param "$cmdline" crashkernel hugepages hugepagesz` + + + cmdline="${cmdline} ${KDUMP_COMMANDLINE_APPEND}" + + local id=`get_bootcpu_initial_apicid` + if [ ! -z ${id} ] ; then + cmdline=`append_cmdline "${cmdline}" disable_cpu_apicid ${id}` + fi + + echo $cmdline +} + + function save_core() { coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`" @@ -132,6 +185,25 @@ function check_config() return 0 } +# check_fence_kdump <image timestamp> +# return 0 if fence_kdump is configured and kdump initrd needs to be rebuilt +function check_fence_kdump() +{ + local image_time=$1 + local cib_time + + is_fence_kdump || return 1 + + cib_time=`pcs cluster cib | xmllint --xpath 'string(/cib/@cib-last-written)' - | \ + xargs -0 date +%s --date` + + if [ -z $cib_time -o $cib_time -le $image_time ]; then + return 1 + fi + + return 0 +} + function check_rebuild() { local extra_modules modified_files="" @@ -167,6 +239,9 @@ function check_rebuild() image_time=0 fi + #also rebuild when cluster conf is changed and fence kdump is enabled. + check_fence_kdump $image_time && modified_files="cluster-cib" + EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2` CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2` EXTRA_BINS="$EXTRA_BINS $CHECK_FILES" @@ -174,6 +249,10 @@ function check_rebuild() EXTRA_BINS="$EXTRA_BINS $CHECK_FILES" files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS" + if [ -f $FENCE_KDUMP_CONFIG ]; then + files="$files $FENCE_KDUMP_CONFIG" + fi + check_exist "$files" && check_executable "$EXTRA_BINS" [ $? -ne 0 ] && return 1 @@ -244,13 +323,7 @@ function load_kdump() fi fi - if [ -z "$KDUMP_COMMANDLINE" ] - then - KDUMP_COMMANDLINE=`cat /proc/cmdline` - fi - KDUMP_COMMANDLINE=`remove_cmdline_param "$KDUMP_COMMANDLINE" crashkernel hugepages hugepagesz` - - KDUMP_COMMANDLINE="${KDUMP_COMMANDLINE} ${KDUMP_COMMANDLINE_APPEND}" + KDUMP_COMMANDLINE=`prepare_cmdline` $KEXEC $KEXEC_ARGS $standard_kexec_args \ --command-line="$KDUMP_COMMANDLINE" \ @@ -317,13 +390,6 @@ function propagate_ssh_key() exit 1 fi - #Check if selinux is on... must flip to permissive mode - #for the moment to create key, then flip back... - se_enforce=`/usr/sbin/sestatus | grep -c "^Current mode.*enforcing"` - if [ "$se_enforce" -ge 1 ]; then - /usr/sbin/setenforce 0 2>&1 > /dev/null - fi - local KEYFILE=$SSH_KEY_LOCATION local errmsg="Failed to propagate ssh key" @@ -336,11 +402,6 @@ function propagate_ssh_key() echo "done." fi - #If necessary, flip selinux back to enforcing - if [ "$se_enforce" -ge 1 ]; then - /usr/sbin/setenforce 1 2>&1 > /dev/null - fi - #now find the target ssh user and server to contact. SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1` SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'` @@ -358,12 +419,8 @@ function propagate_ssh_key() } -function status() +function check_current_kdump_status() { - if [ ! -e /sys/kernel/kexec_crash_loaded ] - then - return 2 - fi rc=`cat /sys/kernel/kexec_crash_loaded` if [ $rc == 1 ]; then return 0 @@ -474,6 +531,70 @@ selinux_relabel() done } +# Check if secure boot is being enforced. +# +# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and +# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four +# bytes are the attributes associated with the variable and can safely be +# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot +# is 1 and SetupMode is 0, then secure boot is being enforced. +# +# SecureBoot-UUID won't always be set when securelevel is 1. For legacy-mode +# and uefi-without-seucre-enabled system, we can manually enable secure mode +# by writing "1" to securelevel. So check both efi var and secure mode is a +# more sane way. +# +# Assume efivars is mounted at /sys/firmware/efi/efivars. +function is_secure_boot_enforced() +{ + local secure_boot_file setup_mode_file + local secure_boot_byte setup_mode_byte + + secure_boot_file=$(find /sys/firmware/efi/efivars -name SecureBoot-* 2>/dev/null) + setup_mode_file=$(find /sys/firmware/efi/efivars -name SetupMode-* 2>/dev/null) + + if [ -f "$secure_boot_file" ] && [ -f "$setup_mode_file" ]; then + secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' $secure_boot_file|cut -d' ' -f 5) + setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' $setup_mode_file|cut -d' ' -f 5) + + if [ "$secure_boot_byte" = "1" ] && [ "$setup_mode_byte" = "0" ]; then + return 0 + fi + fi + + return 1 +} + +# Check if secure mode is being enforced (securelevel =? 1) +function is_secure_mode_enforced() +{ + local secure_mode_byte + + secure_mode_byte=$(cat /sys/kernel/security/securelevel) + + if [ "$secure_mode_byte" = "1" ]; then + return 0 + fi + + return 1 +} + +function check_kdump_feasibility() +{ + if is_secure_boot_enforced; then + echo "Secure Boot is Enabled. Kdump service can't be started. Disable Secure Boot and retry" + return 1; + elif is_secure_mode_enforced; then + echo "securelevel is set to 1 (Secure Mode). Kdump service can't be started." + return 1 + fi + + if [ ! -e /sys/kernel/kexec_crash_loaded ]; then + echo "Kdump is not supported on this kernel" + return 1 + fi +} + function start() { check_config @@ -491,16 +612,16 @@ function start() return 1 fi - status - rc=$? - if [ $rc == 2 ]; then - echo "Kdump is not supported on this kernel: [WARNING]" - return 1; - else - if [ $rc == 0 ]; then - echo "Kdump already running: [WARNING]" - return 0 - fi + check_kdump_feasibility + if [ $? -ne 0 ]; then + echo "Starting kdump: [FAILED]" + return 1 + fi + + check_current_kdump_status + if [ $? == 0 ]; then + echo "Kdump already running: [WARNING]" + return 0 fi if check_ssh_config; then @@ -543,52 +664,55 @@ if [ ! -f "$KDUMP_CONFIG_FILE" ]; then exit 1 fi -# Other kdumpctl instances will block in queue, until this one exits -single_instance_lock - -case "$1" in - start) - if [ -s /proc/vmcore ]; then - save_core - reboot - else - start - fi - ;; - stop) - stop - ;; - status) - EXIT_CODE=0 - status - case "$?" in - 0) - echo "Kdump is operational" +main () +{ + case "$1" in + start) + if [ -s /proc/vmcore ]; then + save_core + reboot + else + start + fi + ;; + stop) + stop + ;; + status) EXIT_CODE=0 + check_current_kdump_status + case "$?" in + 0) + echo "Kdump is operational" + EXIT_CODE=0 + ;; + 1) + echo "Kdump is not operational" + EXIT_CODE=3 + ;; + esac + exit $EXIT_CODE ;; - 1) - echo "Kdump is not operational" - EXIT_CODE=3 + restart) + stop + start + ;; + condrestart) ;; - 2) - echo "Kdump is unsupported on this kernel" - EXIT_CODE=3 + propagate) + propagate_ssh_key ;; + *) + echo $"Usage: $0 {start|stop|status|restart|propagate}" + exit 1 esac - exit $EXIT_CODE - ;; - restart) - stop - start - ;; - condrestart) - ;; - propagate) - propagate_ssh_key - ;; - *) - echo $"Usage: $0 {start|stop|status|restart|propagate}" - exit 1 -esac +} + +# Other kdumpctl instances will block in queue, until this one exits +single_instance_lock + +# To avoid fd 9 leaking, we invoke a subshell, close fd 9 and call main. +# So that fd isn't leaking when main is invoking a subshell. +(exec 9<&-; main $1) exit $? diff --git a/SOURCES/kexec-kdump-howto.txt b/SOURCES/kexec-kdump-howto.txt index d70781b..7ffeab9 100644 --- a/SOURCES/kexec-kdump-howto.txt +++ b/SOURCES/kexec-kdump-howto.txt @@ -438,7 +438,7 @@ is a dump filtering and compression utility provided with kexec-tools. On some architectures, it can drastically reduce the size of your vmcore files, which becomes very useful on systems with large amounts of memory. -A typical setup is 'core_collector makedumpfile -F -c --message-level 1 -d 31', +A typical setup is 'core_collector makedumpfile -F -l --message-level 1 -d 31', but check the output of '/sbin/makedumpfile --help' for a list of all available options (-i and -g don't need to be specified, they're automatically taken care of). Note that use of makedumpfile requires that the kernel-debuginfo package @@ -458,11 +458,11 @@ cp --sparse=always /proc/vmcore <dest-path>/vmcore ex2. --- -core_collector "makedumpfile -c --message-level 1 -d 31" +core_collector "makedumpfile -l --message-level 1 -d 31" Above will effectively be translated to: -makedumpfile -c --message-level 1 -d 31 /proc/vmcore <dest-path>/vmcore +makedumpfile -l --message-level 1 -d 31 /proc/vmcore <dest-path>/vmcore For dump targets like raw and ssh, in general, core collector should expect @@ -482,11 +482,11 @@ cat /proc/vmcore | dd of=<target-device> ex4. --- -core_collector "makedumpfile -F -c --message-level 1 -d 31" +core_collector "makedumpfile -F -l --message-level 1 -d 31" Above will effectively be translated to. -makedumpfile -F -c --message-level 1 -d 31 | dd of=<target-device> +makedumpfile -F -l --message-level 1 -d 31 | dd of=<target-device> ssh dumps core_collector examples: --------- @@ -500,11 +500,11 @@ cat /proc/vmcore | ssh <options> <remote-location> "dd of=path/vmcore" ex6. --- -core_collector "makedumpfile -F -c --message-level 1 -d 31" +core_collector "makedumpfile -F -l --message-level 1 -d 31" Above will effectively be translated to. -makedumpfile -F -c --message-level 1 -d 31 | ssh <options> <remote-location> "dd of=path/vmcore" +makedumpfile -F -l --message-level 1 -d 31 | ssh <options> <remote-location> "dd of=path/vmcore" There is one exception to standard output rule for ssh dumps. And that is scp. As scp can handle ssh destinations for file transfers, one can @@ -521,9 +521,9 @@ scp /proc/vmcore <user@host>:path/vmcore About default core collector ---------------------------- Default core_collector for ssh/raw dump is: -"makedumpfile -F -c --message-level 1 -d 31". +"makedumpfile -F -l --message-level 1 -d 31". Default core_collector for other targets is: -"makedumpfile -c --message-level 1 -d 31". +"makedumpfile -l --message-level 1 -d 31". Even if core_collector option is commented out in kdump.conf, makedumpfile is default core collector and kdump uses it internally. diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-help-and-man-message-for-help.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-help-and-man-message-for-help.patch new file mode 100644 index 0000000..c834227 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-help-and-man-message-for-help.patch @@ -0,0 +1,43 @@ +From eb708cedde4ed48dde5a918a23b2c3b8235df6c5 Mon Sep 17 00:00:00 2001 +From: Baoquan He <bhe@redhat.com> +Date: Tue, 2 Jul 2013 11:11:07 +0900 +Subject: [PATCH 2/2] [PATCH 2/2] Add help and man message for '--help'. + +Conventionally '-h' and '--help' are all provided. Currently makedumpfile +lacks help and man message for '--help'. Here add it. + +Signed-off-by: Baoquan He <bhe@redhat.com> +--- + makedumpfile.8 | 2 +- + print_info.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.8 b/makedumpfile-1.5.4/makedumpfile.8 +index adeb811..f50a011 100644 +--- a/makedumpfile-1.5.4/makedumpfile.8 ++++ b/makedumpfile-1.5.4/makedumpfile.8 +@@ -539,7 +539,7 @@ order from left to right. \fIVMCORE\fRs are assembled into a single + Print debugging message. + + .TP +-\fB\-h\fR ++\fB\-h (\-\-help)\fR + Show help message and LZO/snappy support status (enabled/disabled). + + .TP +diff --git a/makedumpfile-1.5.4/print_info.c b/makedumpfile-1.5.4/print_info.c +index 06939e0..3527970 100644 +--- a/makedumpfile-1.5.4/print_info.c ++++ b/makedumpfile-1.5.4/print_info.c +@@ -255,7 +255,7 @@ print_usage(void) + MSG(" [-f]:\n"); + MSG(" Overwrite DUMPFILE even if it already exists.\n"); + MSG("\n"); +- MSG(" [-h]:\n"); ++ MSG(" [-h, --help]:\n"); + MSG(" Show help message and LZO/snappy support status (enabled/disabled).\n"); + MSG("\n"); + MSG(" [-v]:\n"); +-- +1.8.4.2 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-non-mmap-option-to-disable-mmap-manually.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-non-mmap-option-to-disable-mmap-manually.patch new file mode 100644 index 0000000..8a9ee47 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Add-non-mmap-option-to-disable-mmap-manually.patch @@ -0,0 +1,161 @@ +From a895dc8f2a17f7dac9d3d63de1cea4720557625d Mon Sep 17 00:00:00 2001 +From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp> +Date: Thu, 12 Dec 2013 16:40:12 +0900 +Subject: [PATCH 1/2] [PATCH] Add --non-mmap option to disable mmap() manually. + +When --non-mmap option is specified, makedumpfile doesn't use +mmap() even if /proc/vmcore supports mmap(). + +Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp> +--- + makedumpfile.8 | 11 +++++++++++ + makedumpfile.c | 29 +++++++++++++++++++---------- + makedumpfile.h | 9 +++++++++ + print_info.c | 6 ++++++ + 4 files changed, 45 insertions(+), 10 deletions(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.8 b/makedumpfile-1.5.4/makedumpfile.8 +index f50a011..227b6f7 100644 +--- a/makedumpfile-1.5.4/makedumpfile.8 ++++ b/makedumpfile-1.5.4/makedumpfile.8 +@@ -395,6 +395,17 @@ If you feel the cyclic mode is too slow, please try this mode. + # makedumpfile \-\-non\-cyclic \-d 31 \-x vmlinux /proc/vmcore dumpfile + + .TP ++\fB\-\-non\-mmap\fR ++Never use \fBmmap(2)\fR to read \fIVMCORE\fR even if it supports \fBmmap(2)\fR. ++Generally, reading \fIVMCORE\fR with \fBmmap(2)\fR is faster than without it, ++so ordinary users don't need to specify this option. ++This option is mainly for debugging. ++.br ++.B Example: ++.br ++# makedumpfile \-\-non\-mmap \-d 31 \-x vmlinux /proc/vmcore dumpfile ++ ++.TP + \fB\-\-xen-syms\fR \fIXEN-SYMS\fR + Specify the \fIXEN-SYMS\fR with debug information to analyze the xen's memory usage. + This option extracts the part of xen and domain-0. +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index 600fb5d..b3af28b 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -272,7 +272,7 @@ update_mmap_range(off_t offset, int initial) { + static int + is_mapped_with_mmap(off_t offset) { + +- if (info->flag_usemmap ++ if (info->flag_usemmap == MMAP_ENABLE + && offset >= info->mmap_start_offset + && offset < info->mmap_end_offset) + return TRUE; +@@ -320,7 +320,7 @@ read_from_vmcore(off_t offset, void *bufptr, unsigned long size) + { + const off_t failed = (off_t)-1; + +- if (info->flag_usemmap) { ++ if (info->flag_usemmap == MMAP_ENABLE) { + if (!read_with_mmap(offset, bufptr, size)) { + ERRMSG("Can't read the dump memory(%s) with mmap().\n", + info->name_memory); +@@ -3175,14 +3175,14 @@ out: + if (info->dump_level & DL_EXCLUDE_FREE) + setup_page_is_buddy(); + +- if (!initialize_mmap()) { +- /* this kernel does not support mmap of vmcore */ +- DEBUG_MSG("Kernel can't mmap vmcore, using reads.\n"); +- info->flag_usemmap = FALSE; ++ if (info->flag_usemmap == MMAP_TRY && initialize_mmap()) { ++ DEBUG_MSG("mmap() is available on the kernel.\n"); ++ info->flag_usemmap = MMAP_ENABLE; + } else { +- DEBUG_MSG("read %s with mmap()\n", info->name_memory); +- info->flag_usemmap = TRUE; +- } ++ DEBUG_MSG("The kernel doesn't support mmap(),"); ++ DEBUG_MSG("read() will be used instead.\n"); ++ info->flag_usemmap = MMAP_DISABLE; ++ } + + return TRUE; + } +@@ -8947,6 +8947,7 @@ static struct option longopts[] = { + {"non-cyclic", no_argument, NULL, OPT_NON_CYCLIC}, + {"cyclic-buffer", required_argument, NULL, OPT_CYCLIC_BUFFER}, + {"eppic", required_argument, NULL, OPT_EPPIC}, ++ {"non-mmap", no_argument, NULL, OPT_NON_MMAP}, + {0, 0, 0, 0} + }; + +@@ -8972,7 +8973,12 @@ main(int argc, char *argv[]) + * By default, makedumpfile works in constant memory space. + */ + info->flag_cyclic = TRUE; +- ++ ++ /* ++ * By default, makedumpfile try to use mmap(2) to read /proc/vmcore. ++ */ ++ info->flag_usemmap = MMAP_TRY; ++ + info->block_order = DEFAULT_ORDER; + message_level = DEFAULT_MSG_LEVEL; + while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:lpRvXx:", longopts, +@@ -9069,6 +9075,9 @@ main(int argc, char *argv[]) + case OPT_NON_CYCLIC: + info->flag_cyclic = FALSE; + break; ++ case OPT_NON_MMAP: ++ info->flag_usemmap = MMAP_DISABLE; ++ break; + case OPT_XEN_VMCOREINFO: + info->flag_read_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; +diff --git a/makedumpfile-1.5.4/makedumpfile.h b/makedumpfile-1.5.4/makedumpfile.h +index 517e16e..fe88eff 100644 +--- a/makedumpfile-1.5.4/makedumpfile.h ++++ b/makedumpfile-1.5.4/makedumpfile.h +@@ -128,6 +128,14 @@ enum { + MADDR_XEN + }; + ++/* ++ * State of mmap(2) ++ */ ++enum { ++ MMAP_DISABLE, ++ MMAP_TRY, ++ MMAP_ENABLE, ++}; + + static inline int + test_bit(int nr, unsigned long addr) +@@ -1741,6 +1749,7 @@ struct elf_prstatus { + #define OPT_NON_CYCLIC OPT_START+10 + #define OPT_CYCLIC_BUFFER OPT_START+11 + #define OPT_EPPIC OPT_START+12 ++#define OPT_NON_MMAP OPT_START+13 + + /* + * Function Prototype. +diff --git a/makedumpfile-1.5.4/print_info.c b/makedumpfile-1.5.4/print_info.c +index d7a8600..90b6cee 100644 +--- a/makedumpfile-1.5.4/print_info.c ++++ b/makedumpfile-1.5.4/print_info.c +@@ -196,6 +196,12 @@ print_usage(void) + MSG(" same as v1.4.4 or before.\n"); + MSG(" If you feel the cyclic mode is too slow, please try this mode.\n"); + MSG("\n"); ++ MSG(" [--non-mmap]:\n"); ++ MSG(" Never use mmap(2) to read VMCORE even if it supports mmap(2).\n"); ++ MSG(" Generally, reading VMCORE with mmap(2) is faster than without it,\n"); ++ MSG(" so ordinary users don't need to specify this option.\n"); ++ MSG(" This option is mainly for debugging.\n"); ++ MSG("\n"); + MSG(" [--xen-syms XEN-SYMS]:\n"); + MSG(" Specify the XEN-SYMS to analyze Xen's memory usage.\n"); + MSG("\n"); +-- +1.8.4.2 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Assign-non-printable-value-as-short-option.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Assign-non-printable-value-as-short-option.patch new file mode 100644 index 0000000..115e3d8 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Assign-non-printable-value-as-short-option.patch @@ -0,0 +1,244 @@ +From bd67c1d5e2633f302b4c0ad50cc830ff7da20b2a Mon Sep 17 00:00:00 2001 +From: Baoquan He <bhe@redhat.com> +Date: Tue, 2 Jul 2013 11:09:20 +0900 +Subject: [PATCH 1/2] [PATCH 1/2] Assign non-printable value as short options. + +Characters for short options is limited, and now makedumpfile has +considerably many options. As times go on, no enough reasonable +letters can be assigned to each functionality with short options. + +E.g non-cyclic vs Y, cyclic-buffer vs Z, eppic vs S. + +Now assign non-printable value to these kind of short optins, meanwhile +define them as indicative MACRO which can make code more readable. + +Signed-off-by: Baoquan He <bhe@redhat.com> +--- + makedumpfile.c | 88 +++++++++++++++++++++++++++++----------------------------- + makedumpfile.h | 35 +++++++++++++++++++++++ + 2 files changed, 79 insertions(+), 44 deletions(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index b42565c..bb72c66 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -8555,20 +8555,20 @@ calculate_cyclic_buffer_size(void) { + } + + static struct option longopts[] = { +- {"split", no_argument, NULL, 's'}, +- {"reassemble", no_argument, NULL, 'r'}, +- {"xen-syms", required_argument, NULL, 'y'}, +- {"xen-vmcoreinfo", required_argument, NULL, 'z'}, +- {"xen_phys_start", required_argument, NULL, 'P'}, +- {"message-level", required_argument, NULL, 'm'}, +- {"vtop", required_argument, NULL, 'V'}, +- {"dump-dmesg", no_argument, NULL, 'M'}, +- {"config", required_argument, NULL, 'C'}, +- {"help", no_argument, NULL, 'h'}, +- {"diskset", required_argument, NULL, 'k'}, +- {"non-cyclic", no_argument, NULL, 'Y'}, +- {"cyclic-buffer", required_argument, NULL, 'Z'}, +- {"eppic", required_argument, NULL, 'S'}, ++ {"split", no_argument, NULL, OPT_SPLIT}, ++ {"reassemble", no_argument, NULL, OPT_REASSEMBLE}, ++ {"xen-syms", required_argument, NULL, OPT_XEN_SYMS}, ++ {"xen-vmcoreinfo", required_argument, NULL, OPT_XEN_VMCOREINFO}, ++ {"xen_phys_start", required_argument, NULL, OPT_XEN_PHYS_START}, ++ {"message-level", required_argument, NULL, OPT_MESSAGE_LEVEL}, ++ {"vtop", required_argument, NULL, OPT_VTOP}, ++ {"dump-dmesg", no_argument, NULL, OPT_DUMP_DMESG}, ++ {"config", required_argument, NULL, OPT_CONFIG}, ++ {"help", no_argument, NULL, OPT_HELP}, ++ {"diskset", required_argument, NULL, OPT_DISKSET}, ++ {"non-cyclic", no_argument, NULL, OPT_NON_CYCLIC}, ++ {"cyclic-buffer", required_argument, NULL, OPT_CYCLIC_BUFFER}, ++ {"eppic", required_argument, NULL, OPT_EPPIC}, + {0, 0, 0, 0} + }; + +@@ -8597,29 +8597,29 @@ main(int argc, char *argv[]) + + info->block_order = DEFAULT_ORDER; + message_level = DEFAULT_MSG_LEVEL; +- while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:lMpRrsvXx:", longopts, ++ while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:lpRvXx:", longopts, + NULL)) != -1) { + switch (opt) { +- case 'b': ++ case OPT_BLOCK_ORDER: + info->block_order = atoi(optarg); + break; +- case 'C': ++ case OPT_CONFIG: + info->name_filterconfig = optarg; + break; +- case 'c': ++ case OPT_COMPRESS_ZLIB: + info->flag_compress = DUMP_DH_COMPRESSED_ZLIB; + break; +- case 'D': ++ case OPT_DEBUG: + flag_debug = TRUE; + break; +- case 'd': ++ case OPT_DUMP_LEVEL: + if (!parse_dump_level(optarg)) + goto out; + break; +- case 'E': ++ case OPT_ELF_DUMPFILE: + info->flag_elf_dumpfile = 1; + break; +- case 'F': ++ case OPT_FLATTEN: + info->flag_flatten = 1; + /* + * All messages are output to STDERR because STDOUT is +@@ -8627,75 +8627,75 @@ main(int argc, char *argv[]) + */ + flag_strerr_message = TRUE; + break; +- case 'f': ++ case OPT_FORCE: + info->flag_force = 1; + break; +- case 'g': ++ case OPT_GENERATE_VMCOREINFO: + info->flag_generate_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; +- case 'h': ++ case OPT_HELP: + info->flag_show_usage = 1; + break; +- case 'i': ++ case OPT_READ_VMCOREINFO: + info->flag_read_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; +- case 'k': ++ case OPT_DISKSET: + if (!sadump_add_diskset_info(optarg)) + goto out; + info->flag_sadump_diskset = 1; + break; +- case 'l': ++ case OPT_COMPRESS_LZO: + info->flag_compress = DUMP_DH_COMPRESSED_LZO; + break; +- case 'm': ++ case OPT_MESSAGE_LEVEL: + message_level = atoi(optarg); + break; +- case 'M': ++ case OPT_DUMP_DMESG: + info->flag_dmesg = 1; + break; +- case 'p': ++ case OPT_COMPRESS_SNAPPY: + info->flag_compress = DUMP_DH_COMPRESSED_SNAPPY; + break; +- case 'P': ++ case OPT_XEN_PHYS_START: + info->xen_phys_start = strtoul(optarg, NULL, 0); + break; +- case 'R': ++ case OPT_REARRANGE: + info->flag_rearrange = 1; + break; +- case 's': ++ case OPT_SPLIT: + info->flag_split = 1; + break; +- case 'S': ++ case OPT_EPPIC: + info->name_eppic_config = optarg; + break; +- case 'r': ++ case OPT_REASSEMBLE: + info->flag_reassemble = 1; + break; +- case 'V': ++ case OPT_VTOP: + info->vaddr_for_vtop = strtoul(optarg, NULL, 0); + break; +- case 'v': ++ case OPT_VERSION: + info->flag_show_version = 1; + break; +- case 'X': ++ case OPT_EXCLUDE_XEN_DOM: + info->flag_exclude_xen_dom = 1; + break; +- case 'x': ++ case OPT_VMLINUX: + info->name_vmlinux = optarg; + break; +- case 'y': ++ case OPT_XEN_SYMS: + info->name_xen_syms = optarg; + break; +- case 'Y': ++ case OPT_NON_CYCLIC: + info->flag_cyclic = FALSE; + break; +- case 'z': ++ case OPT_XEN_VMCOREINFO: + info->flag_read_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; +- case 'Z': ++ case OPT_CYCLIC_BUFFER: + info->bufsize_cyclic = atoi(optarg); + break; + case '?': +diff --git a/makedumpfile-1.5.4/makedumpfile.h b/makedumpfile-1.5.4/makedumpfile.h +index a5826e0..79d4702 100644 +--- a/makedumpfile-1.5.4/makedumpfile.h ++++ b/makedumpfile-1.5.4/makedumpfile.h +@@ -1671,6 +1671,41 @@ struct elf_prstatus { + #endif + + /* ++ * Below are options which getopt_long can recognize. From OPT_START options are ++ * non-printable, just used for implementation. ++ */ ++#define OPT_BLOCK_ORDER 'b' ++#define OPT_COMPRESS_ZLIB 'c' ++#define OPT_DEBUG 'D' ++#define OPT_DUMP_LEVEL 'd' ++#define OPT_ELF_DUMPFILE 'E' ++#define OPT_FLATTEN 'F' ++#define OPT_FORCE 'f' ++#define OPT_GENERATE_VMCOREINFO 'g' ++#define OPT_HELP 'h' ++#define OPT_READ_VMCOREINFO 'i' ++#define OPT_COMPRESS_LZO 'l' ++#define OPT_COMPRESS_SNAPPY 'p' ++#define OPT_REARRANGE 'R' ++#define OPT_VERSION 'v' ++#define OPT_EXCLUDE_XEN_DOM 'X' ++#define OPT_VMLINUX 'x' ++#define OPT_START 256 ++#define OPT_SPLIT OPT_START+0 ++#define OPT_REASSEMBLE OPT_START+1 ++#define OPT_XEN_SYMS OPT_START+2 ++#define OPT_XEN_VMCOREINFO OPT_START+3 ++#define OPT_XEN_PHYS_START OPT_START+4 ++#define OPT_MESSAGE_LEVEL OPT_START+5 ++#define OPT_VTOP OPT_START+6 ++#define OPT_DUMP_DMESG OPT_START+7 ++#define OPT_CONFIG OPT_START+8 ++#define OPT_DISKSET OPT_START+9 ++#define OPT_NON_CYCLIC OPT_START+10 ++#define OPT_CYCLIC_BUFFER OPT_START+11 ++#define OPT_EPPIC OPT_START+12 ++ ++/* + * Function Prototype. + */ + unsigned long long get_num_dumpable_cyclic(void); +-- +1.8.4.2 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Fall-back-to-read-when-mmap-fails.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Fall-back-to-read-when-mmap-fails.patch new file mode 100644 index 0000000..a4edb75 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Fall-back-to-read-when-mmap-fails.patch @@ -0,0 +1,38 @@ +From 7c770ed052d8452f5d7ce027b23d1b77cf6fbce7 Mon Sep 17 00:00:00 2001 +From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp> +Date: Thu, 12 Dec 2013 16:40:31 +0900 +Subject: [PATCH 2/2] [PATCH] Fall back to read() when mmap() fails. + +This is a fall back path for mmap(). +This patch disables mmap() when facing the issues related to mmap(), +and read() will be used to read vmcore instead. + +Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp> +--- + makedumpfile.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index b3af28b..20f107e 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -324,7 +324,15 @@ read_from_vmcore(off_t offset, void *bufptr, unsigned long size) + if (!read_with_mmap(offset, bufptr, size)) { + ERRMSG("Can't read the dump memory(%s) with mmap().\n", + info->name_memory); +- return FALSE; ++ ++ ERRMSG("This kernel might have some problems about mmap().\n"); ++ ERRMSG("read() will be used instead of mmap() from now.\n"); ++ ++ /* ++ * Fall back to read(). ++ */ ++ info->flag_usemmap = MMAP_DISABLE; ++ read_from_vmcore(offset, bufptr, size); + } + } else { + if (lseek(info->fd_memory, offset, SEEK_SET) == failed) { +-- +1.8.4.2 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Improve-progress-information-for-huge-memor.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Improve-progress-information-for-huge-memor.patch new file mode 100644 index 0000000..9b15185 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Improve-progress-information-for-huge-memor.patch @@ -0,0 +1,200 @@ +From 20ecc0827e7837c52f3903638a59959f8bf17f9e Mon Sep 17 00:00:00 2001 +From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com> +Date: Tue, 5 Nov 2013 00:29:35 +0900 +Subject: [PATCH] [PATCH v2] Improve progress information for huge memory + system. + +On system with huge memory, percentage in progress information is +updated at very slow interval, because 1 percent on 1 TiB memory is +about 10 GiB, which looks like as if system has freezed. Then, +confused users might get tempted to push a reset button to recover the +system. We want to avoid such situation as much as possible. + +To address the issue, this patch: + +- increases the number of calling print_progress() from once in + (written pages / 100)-pages to once in (written pages / + 10000)-pages, + +- extends precision in progress information by adding 1 digit to its + fractional part, and + +- adds spinner that rotates in the order of /, |, \ and - in next to + the progress indicator in percentage, + +which help users to get aware that system is still active and crash +dump process is still in progress now. + +The spinner code is borrowed from diskdump code. + +The ouput is changed from: + +Copying data : [ 0 %] / +Copying data : [ 8 %] | +Copying data : [ 11 %] \ +Copying data : [ 14 %] - +Copying data : [ 16 %] / +... +Copying data : [ 99 %] / +Copying data : [100 %] | + +to: + +Copying data : [ 0.1 %] / +Copying data : [ 8.9 %] | +Copying data : [ 11.6 %] \ +Copying data : [ 14.3 %] - +Copying data : [ 16.4 %] / +... +Copying data : [ 99.2 %] / +Copying data : [100.0 %] | + +This patch doesn't adopt purely time-based approach that records the +time when print_progress() is called at each invocation and print the +recorded time if it is strictly larger than the previous time value. + +The problem is that calling time() system call amounts to +considertably long time in total on huge memory system. For example, +here is a simple bench that measures total execution time of time() +system call for (1TiB / 4KiB)-times: + +$ ./bench +total: 18.360503 +total: 34.910297 + +the result of which reveals that it amounts to about 20 seconds with +vDSO optimization and about 35 seconds without. + +BTW, on our 12 TiB memory system, we collect about 300 GiB crash dump +in about 40 minutes with dump level 31. On 12TiB, the benchmark result +corresponds to about 4 minutes and 7 minutes respectively, both of which +affects a whole performance. + +==bench.c +static inline double getdtime(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec * 1.0e-6; +} + +int main(int argc, char **argv) +{ + unsigned long i; + time_t t; + double t1, t2, total; + const int NR_time = 201; + const unsigned long nr_repeat = (1UL << 40) / 4096; + + total = 0; + for (i = 0; i < nr_repeat; ++i) { + t1 = getdtime(); + time(&t); + t2 = getdtime(); + total += t2 - t1; + } + printf("total: %lf\n", total); + + total = 0; + for (i = 0; i < nr_repeat; ++i) { + t1 = getdtime(); + syscall(NR_time, &t); + t2 = getdtime(); + total += t2 - t1; + } + printf("total: %lf\n", total); + + return 0; +} +== + +Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com> +--- + makedumpfile.c | 8 ++++---- + print_info.c | 15 +++++++++------ + 2 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index dafe83b..3746cf6 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -5573,7 +5573,7 @@ write_elf_pages(struct cache_data *cd_header, struct cache_data *cd_page) + initialize_2nd_bitmap(&bitmap2); + + num_dumpable = get_num_dumpable(); +- per = num_dumpable / 100; ++ per = num_dumpable / 10000; + + off_seg_load = info->offset_load_dumpfile; + cd_page->offset = info->offset_load_dumpfile; +@@ -5858,7 +5858,7 @@ write_elf_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page) + return FALSE; + + num_dumpable = info->num_dumpable; +- per = num_dumpable / 100; ++ per = num_dumpable / 10000; + + off_seg_load = info->offset_load_dumpfile; + cd_page->offset = info->offset_load_dumpfile; +@@ -6116,7 +6116,7 @@ write_kdump_pages(struct cache_data *cd_header, struct cache_data *cd_page) + } + + num_dumpable = get_num_dumpable(); +- per = num_dumpable / 100; ++ per = num_dumpable / 10000; + + /* + * Calculate the offset of the page data. +@@ -6317,7 +6317,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag + goto out; + } + +- per = info->num_dumpable / 100; ++ per = info->num_dumpable / 10000; + + /* + * Set a fileoffset of Physical Address 0x0. +diff --git a/makedumpfile-1.5.4/print_info.c b/makedumpfile-1.5.4/print_info.c +index 3527970..d7a8600 100644 +--- a/makedumpfile-1.5.4/print_info.c ++++ b/makedumpfile-1.5.4/print_info.c +@@ -283,27 +283,30 @@ print_usage(void) + void + print_progress(const char *msg, unsigned long current, unsigned long end) + { +- int progress; ++ float progress; + time_t tm; + static time_t last_time = 0; ++ static unsigned int lapse = 0; ++ static const char *spinner = "/|\\-"; + + if (current < end) { + tm = time(NULL); + if (tm - last_time < 1) + return; + last_time = tm; +- progress = current * 100 / end; ++ progress = (float)current * 100 / end; + } else + progress = 100; + + if (flag_ignore_r_char) { +- PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3d %%]\n", +- msg, progress); ++ PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%5.1f %%] %c\n", ++ msg, progress, spinner[lapse % 4]); + } else { + PROGRESS_MSG("\r"); +- PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3d %%] ", +- msg, progress); ++ PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%5.1f %%] %c", ++ msg, progress, spinner[lapse % 4]); + } ++ lapse++; + } + + void +-- +1.8.3.1 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Support-to-filter-dump-for-kernels-that-use.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Support-to-filter-dump-for-kernels-that-use.patch new file mode 100644 index 0000000..299f1a8 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Support-to-filter-dump-for-kernels-that-use.patch @@ -0,0 +1,429 @@ +From bcdba922182def3dac288ca201e77e7738a1e4ab Mon Sep 17 00:00:00 2001 +From: Hari Bathini <hbathini@linux.vnet.ibm.com> +Date: Mon, 25 Nov 2013 17:20:55 +0900 +Subject: [PATCH] [PATCH v5] Support to filter dump for kernels that use + CONFIG_SPARSEMEM_VMEMMAP. + +Makedumpfile tool fails to filter dump for kernels that are build with +CONFIG_SPARSEMEM_VMEMMAP set, as it fails to do address translations +for vmemmap regions that are mapped out of zone normal. This patch +provides support in makedumpfile to do vmemmap to physical address +translations when they are mapped outside zone normal. Some kernel +symbols are needed in vmcoreinfo for this changes to be effective. +The kernel patch that adds the necessary symbols to vmcoreinfo has +been posted to linuxppc devel mailing list. This patch is influenced +by vmemmap to physical address translation support code in crash tool. +This patch has been tested successfully at all dump filtering levels +on kernels with CONFIG_SPARSEMEM_VMEMMAP set/unset. Also, tested dump +filtering on already filtered vmcores (re-filtering). + +Changes from v4 to v5: +Trimmed patch description to be compact and readable. + +Changes from v3 to v4: +Rebased to devel branch. + +Signed-off-by: Onkar N Mahajan <onmahaja@in.ibm.com> +Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> +--- + arch/ppc64.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- + makedumpfile.c | 39 +++++++++++++ + makedumpfile.h | 37 ++++++++++++ + 3 files changed, 247 insertions(+), 4 deletions(-) + +diff --git a/makedumpfile-1.5.4/arch/ppc64.c b/makedumpfile-1.5.4/arch/ppc64.c +index 85144f6..09c0eb3 100644 +--- a/makedumpfile-1.5.4/arch/ppc64.c ++++ b/makedumpfile-1.5.4/arch/ppc64.c +@@ -24,6 +24,154 @@ + #include "../elf_info.h" + #include "../makedumpfile.h" + ++/* ++ * This function traverses vmemmap list to get the count of vmemmap regions ++ * and populates the regions' info in info->vmemmap_list[] ++ */ ++static int ++get_vmemmap_list_info(ulong head) ++{ ++ int i, cnt; ++ long backing_size, virt_addr_offset, phys_offset, list_offset; ++ ulong curr, next; ++ char *vmemmap_buf = NULL; ++ ++ backing_size = SIZE(vmemmap_backing); ++ virt_addr_offset = OFFSET(vmemmap_backing.virt_addr); ++ phys_offset = OFFSET(vmemmap_backing.phys); ++ list_offset = OFFSET(vmemmap_backing.list); ++ info->vmemmap_list = NULL; ++ ++ /* ++ * Get list count by traversing the vmemmap list ++ */ ++ cnt = 0; ++ curr = head; ++ next = 0; ++ do { ++ if (!readmem(VADDR, (curr + list_offset), &next, ++ sizeof(next))) { ++ ERRMSG("Can't get vmemmap region addresses\n"); ++ goto err; ++ } ++ curr = next; ++ cnt++; ++ } while ((next != 0) && (next != head)); ++ ++ /* ++ * Using temporary buffer to save vmemmap region information ++ */ ++ vmemmap_buf = calloc(1, backing_size); ++ if (vmemmap_buf == NULL) { ++ ERRMSG("Can't allocate memory for vmemmap_buf. %s\n", ++ strerror(errno)); ++ goto err; ++ } ++ ++ info->vmemmap_list = calloc(1, cnt * sizeof(struct ppc64_vmemmap)); ++ if (info->vmemmap_list == NULL) { ++ ERRMSG("Can't allocate memory for vmemmap_list. %s\n", ++ strerror(errno)); ++ goto err; ++ } ++ ++ curr = head; ++ for (i = 0; i < cnt; i++) { ++ if (!readmem(VADDR, curr, vmemmap_buf, backing_size)) { ++ ERRMSG("Can't get vmemmap region info\n"); ++ goto err; ++ } ++ ++ info->vmemmap_list[i].phys = ULONG(vmemmap_buf + phys_offset); ++ info->vmemmap_list[i].virt = ULONG(vmemmap_buf + ++ virt_addr_offset); ++ curr = ULONG(vmemmap_buf + list_offset); ++ ++ if (info->vmemmap_list[i].virt < info->vmemmap_start) ++ info->vmemmap_start = info->vmemmap_list[i].virt; ++ ++ if ((info->vmemmap_list[i].virt + info->vmemmap_psize) > ++ info->vmemmap_end) ++ info->vmemmap_end = (info->vmemmap_list[i].virt + ++ info->vmemmap_psize); ++ } ++ ++ free(vmemmap_buf); ++ return cnt; ++err: ++ free(vmemmap_buf); ++ free(info->vmemmap_list); ++ return 0; ++} ++ ++/* ++ * Verify that the kernel has made the vmemmap list available, ++ * and if so, stash the relevant data required to make vtop ++ * translations. ++ */ ++static int ++ppc64_vmemmap_init(void) ++{ ++ int psize, shift; ++ ulong head; ++ ++ if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL) ++ || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL) ++ || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL) ++ || (SIZE(vmemmap_backing) == NOT_FOUND_STRUCTURE) ++ || (SIZE(mmu_psize_def) == NOT_FOUND_STRUCTURE) ++ || (OFFSET(mmu_psize_def.shift) == NOT_FOUND_STRUCTURE) ++ || (OFFSET(vmemmap_backing.phys) == NOT_FOUND_STRUCTURE) ++ || (OFFSET(vmemmap_backing.virt_addr) == NOT_FOUND_STRUCTURE) ++ || (OFFSET(vmemmap_backing.list) == NOT_FOUND_STRUCTURE)) ++ return FALSE; ++ ++ if (!readmem(VADDR, SYMBOL(mmu_vmemmap_psize), &psize, sizeof(int))) ++ return FALSE; ++ ++ if (!readmem(VADDR, SYMBOL(mmu_psize_defs) + ++ (SIZE(mmu_psize_def) * psize) + ++ OFFSET(mmu_psize_def.shift), &shift, sizeof(int))) ++ return FALSE; ++ info->vmemmap_psize = 1 << shift; ++ ++ if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long))) ++ return FALSE; ++ ++ /* ++ * Get vmemmap list count and populate vmemmap regions info ++ */ ++ info->vmemmap_cnt = get_vmemmap_list_info(head); ++ if (info->vmemmap_cnt == 0) ++ return FALSE; ++ ++ info->flag_vmemmap = TRUE; ++ return TRUE; ++} ++ ++/* ++ * If the vmemmap address translation information is stored in the kernel, ++ * make the translation. ++ */ ++static unsigned long long ++ppc64_vmemmap_to_phys(unsigned long vaddr) ++{ ++ int i; ++ ulong offset; ++ unsigned long long paddr = NOT_PADDR; ++ ++ for (i = 0; i < info->vmemmap_cnt; i++) { ++ if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr < ++ (info->vmemmap_list[i].virt + info->vmemmap_psize))) { ++ offset = vaddr - info->vmemmap_list[i].virt; ++ paddr = info->vmemmap_list[i].phys + offset; ++ break; ++ } ++ } ++ ++ return paddr; ++} ++ + int + set_ppc64_max_physmem_bits(void) + { +@@ -103,6 +251,16 @@ get_machdep_info_ppc64(void) + info->vmalloc_start = vmalloc_start; + DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); + ++ if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) { ++ info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT; ++ info->vmemmap_end = info->vmemmap_start; ++ if (ppc64_vmemmap_init() == FALSE) { ++ ERRMSG("Can't get vmemmap list info.\n"); ++ return FALSE; ++ } ++ DEBUG_MSG("vmemmap_start: %lx\n", info->vmemmap_start); ++ } ++ + return TRUE; + } + +@@ -121,14 +279,23 @@ vaddr_to_paddr_ppc64(unsigned long vaddr) + if (paddr != NOT_PADDR) + return paddr; + +- if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) +- || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) { +- ERRMSG("Can't get necessary information for vmalloc translation.\n"); +- return NOT_PADDR; ++ if ((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL) ++ || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE) ++ || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) { ++ if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) ++ || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) { ++ ERRMSG("Can't get info for vmalloc translation.\n"); ++ return NOT_PADDR; ++ } + } + if (!is_vmalloc_addr_ppc64(vaddr)) + return (vaddr - info->kernel_start); + ++ if ((info->flag_vmemmap) ++ && (vaddr >= info->vmemmap_start)) { ++ return ppc64_vmemmap_to_phys(vaddr); ++ } ++ + /* + * TODO: Support vmalloc translation. + */ +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index 3746cf6..0c68f32 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -1107,6 +1107,10 @@ get_symbol_info(void) + SYMBOL_ARRAY_LENGTH_INIT(node_remap_start_pfn, + "node_remap_start_pfn"); + ++ SYMBOL_INIT(vmemmap_list, "vmemmap_list"); ++ SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs"); ++ SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize"); ++ + return TRUE; + } + +@@ -1417,6 +1421,20 @@ get_structure_info(void) + OFFSET_INIT(printk_log.text_len, "log", "text_len"); + } + ++ /* ++ * Get offsets of the vmemmap_backing's members. ++ */ ++ SIZE_INIT(vmemmap_backing, "vmemmap_backing"); ++ OFFSET_INIT(vmemmap_backing.phys, "vmemmap_backing", "phys"); ++ OFFSET_INIT(vmemmap_backing.virt_addr, "vmemmap_backing", "virt_addr"); ++ OFFSET_INIT(vmemmap_backing.list, "vmemmap_backing", "list"); ++ ++ /* ++ * Get offsets of the mmu_psize_def's members. ++ */ ++ SIZE_INIT(mmu_psize_def, "mmu_psize_def"); ++ OFFSET_INIT(mmu_psize_def.shift, "mmu_psize_def", "shift"); ++ + return TRUE; + } + +@@ -1603,6 +1621,9 @@ write_vmcoreinfo_data(void) + WRITE_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); + WRITE_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); + WRITE_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); ++ WRITE_SYMBOL("vmemmap_list", vmemmap_list); ++ WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs); ++ WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); + + /* + * write the structure size of 1st kernel +@@ -1620,6 +1641,8 @@ write_vmcoreinfo_data(void) + WRITE_STRUCTURE_SIZE("printk_log", printk_log); + else + WRITE_STRUCTURE_SIZE("log", printk_log); ++ WRITE_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); ++ WRITE_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); + + /* + * write the member offset of 1st kernel +@@ -1664,6 +1687,11 @@ write_vmcoreinfo_data(void) + WRITE_MEMBER_OFFSET("log.len", printk_log.len); + WRITE_MEMBER_OFFSET("log.text_len", printk_log.text_len); + } ++ WRITE_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); ++ WRITE_MEMBER_OFFSET("vmemmap_backing.virt_addr", ++ vmemmap_backing.virt_addr); ++ WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); ++ WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); + + if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("node_data", node_data); +@@ -1932,6 +1960,9 @@ read_vmcoreinfo(void) + READ_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); + READ_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); + READ_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); ++ READ_SYMBOL("vmemmap_list", vmemmap_list); ++ READ_SYMBOL("mmu_psize_defs", mmu_psize_defs); ++ READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); + + READ_STRUCTURE_SIZE("page", page); + READ_STRUCTURE_SIZE("mem_section", mem_section); +@@ -1942,6 +1973,9 @@ read_vmcoreinfo(void) + READ_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); + READ_STRUCTURE_SIZE("nodemask_t", nodemask_t); + READ_STRUCTURE_SIZE("pageflags", pageflags); ++ READ_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); ++ READ_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); ++ + + READ_MEMBER_OFFSET("page.flags", page.flags); + READ_MEMBER_OFFSET("page._count", page._count); +@@ -1972,6 +2006,11 @@ read_vmcoreinfo(void) + READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); + READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); + READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list); ++ READ_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); ++ READ_MEMBER_OFFSET("vmemmap_backing.virt_addr", ++ vmemmap_backing.virt_addr); ++ READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); ++ READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); + + READ_STRUCTURE_SIZE("printk_log", printk_log); + if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { +diff --git a/makedumpfile-1.5.4/makedumpfile.h b/makedumpfile-1.5.4/makedumpfile.h +index 3a7e61a..517e16e 100644 +--- a/makedumpfile-1.5.4/makedumpfile.h ++++ b/makedumpfile-1.5.4/makedumpfile.h +@@ -576,6 +576,8 @@ do { \ + #define _SECTION_SIZE_BITS (24) + #define _MAX_PHYSMEM_BITS_ORIG (44) + #define _MAX_PHYSMEM_BITS_3_7 (46) ++#define REGION_SHIFT (60UL) ++#define VMEMMAP_REGION_ID (0xfUL) + #endif + + #ifdef __powerpc32__ +@@ -862,6 +864,11 @@ struct splitting_info { + unsigned long size_eraseinfo; + } splitting_info_t; + ++struct ppc64_vmemmap { ++ unsigned long phys; ++ unsigned long virt; ++}; ++ + struct DumpInfo { + int32_t kernel_version; /* version of first kernel*/ + struct timeval timestamp; +@@ -895,6 +902,7 @@ struct DumpInfo { + int flag_dmesg; /* dump the dmesg log out of the vmcore file */ + int flag_use_printk_log; /* did we read printk_log symbol name? */ + int flag_nospace; /* the flag of "No space on device" error */ ++ int flag_vmemmap; /* kernel supports vmemmap address space */ + unsigned long vaddr_for_vtop; /* virtual address for debugging */ + long page_size; /* size of page */ + long page_shift; +@@ -909,6 +917,9 @@ struct DumpInfo { + unsigned long vmalloc_end; + unsigned long vmemmap_start; + unsigned long vmemmap_end; ++ int vmemmap_psize; ++ int vmemmap_cnt; ++ struct ppc64_vmemmap *vmemmap_list; + + /* + * Filter config file containing filter commands to filter out kernel +@@ -1166,6 +1177,13 @@ struct symbol_table { + unsigned long long __per_cpu_load; + unsigned long long cpu_online_mask; + unsigned long long kexec_crash_image; ++ ++ /* ++ * vmemmap symbols on ppc64 arch ++ */ ++ unsigned long long vmemmap_list; ++ unsigned long long mmu_vmemmap_psize; ++ unsigned long long mmu_psize_defs; + }; + + struct size_table { +@@ -1201,6 +1219,12 @@ struct size_table { + long kexec_segment; + long elf64_hdr; + ++ /* ++ * vmemmap symbols on ppc64 arch ++ */ ++ long vmemmap_backing; ++ long mmu_psize_def; ++ + long pageflags; + }; + +@@ -1344,6 +1368,19 @@ struct offset_table { + long text_len; + } printk_log; + ++ /* ++ * vmemmap symbols on ppc64 arch ++ */ ++ struct mmu_psize_def { ++ long shift; ++ } mmu_psize_def; ++ ++ struct vmemmap_backing { ++ long phys; ++ long virt_addr; ++ long list; ++ } vmemmap_backing; ++ + }; + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-Understand-v3.11-rc4-dmesg.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-Understand-v3.11-rc4-dmesg.patch new file mode 100644 index 0000000..084e7d5 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-makedumpfile-Understand-v3.11-rc4-dmesg.patch @@ -0,0 +1,197 @@ +From a01b663749c4b221ecd03285fa24a4b31e742004 Mon Sep 17 00:00:00 2001 +From: Lubomir Rintel <lkundrak@v3.sk> +Date: Fri, 20 Sep 2013 15:56:49 +0900 +Subject: [PATCH] [PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg. + +Symbol name changed with the following commit: +62e32ac printk: rename struct log to struct printk_log + +Changes for v2: + * Only back values for symbol names we did actually read; + either "log" or "printk_log" + +Signed-off-by: Lubomir Rintel <lkundrak@v3.sk> +--- + makedumpfile.c | 69 +++++++++++++++++++++++++++++++++++++++++++--------------- + makedumpfile.h | 7 +++--- + 2 files changed, 55 insertions(+), 21 deletions(-) + +diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c +index e01ff50..7bbdcc2 100644 +--- a/makedumpfile-1.5.4/makedumpfile.c ++++ b/makedumpfile-1.5.4/makedumpfile.c +@@ -1389,10 +1389,23 @@ get_structure_info(void) + OFFSET_INIT(elf64_phdr.p_paddr, "elf64_phdr", "p_paddr"); + OFFSET_INIT(elf64_phdr.p_memsz, "elf64_phdr", "p_memsz"); + +- SIZE_INIT(log, "log"); +- OFFSET_INIT(log.ts_nsec, "log", "ts_nsec"); +- OFFSET_INIT(log.len, "log", "len"); +- OFFSET_INIT(log.text_len, "log", "text_len"); ++ SIZE_INIT(printk_log, "printk_log"); ++ if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { ++ /* ++ * In kernel 3.11-rc4 the log structure name was renamed ++ * to "printk_log". ++ */ ++ info->flag_use_printk_log = TRUE; ++ OFFSET_INIT(printk_log.ts_nsec, "printk_log", "ts_nsec"); ++ OFFSET_INIT(printk_log.len, "printk_log", "len"); ++ OFFSET_INIT(printk_log.text_len, "printk_log", "text_len"); ++ } else { ++ info->flag_use_printk_log = FALSE; ++ SIZE_INIT(printk_log, "log"); ++ OFFSET_INIT(printk_log.ts_nsec, "log", "ts_nsec"); ++ OFFSET_INIT(printk_log.len, "log", "len"); ++ OFFSET_INIT(printk_log.text_len, "log", "text_len"); ++ } + + return TRUE; + } +@@ -1593,7 +1606,10 @@ write_vmcoreinfo_data(void) + WRITE_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); + WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t); + WRITE_STRUCTURE_SIZE("pageflags", pageflags); +- WRITE_STRUCTURE_SIZE("log", log); ++ if (info->flag_use_printk_log) ++ WRITE_STRUCTURE_SIZE("printk_log", printk_log); ++ else ++ WRITE_STRUCTURE_SIZE("log", printk_log); + + /* + * write the member offset of 1st kernel +@@ -1628,9 +1644,16 @@ write_vmcoreinfo_data(void) + WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); + WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); + WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list); +- WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); +- WRITE_MEMBER_OFFSET("log.len", log.len); +- WRITE_MEMBER_OFFSET("log.text_len", log.text_len); ++ if (info->flag_use_printk_log) { ++ WRITE_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec); ++ WRITE_MEMBER_OFFSET("printk_log.len", printk_log.len); ++ WRITE_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len); ++ } else { ++ /* Compatibility with pre-3.11-rc4 */ ++ WRITE_MEMBER_OFFSET("log.ts_nsec", printk_log.ts_nsec); ++ WRITE_MEMBER_OFFSET("log.len", printk_log.len); ++ WRITE_MEMBER_OFFSET("log.text_len", printk_log.text_len); ++ } + + if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("node_data", node_data); +@@ -1909,7 +1932,6 @@ read_vmcoreinfo(void) + READ_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); + READ_STRUCTURE_SIZE("nodemask_t", nodemask_t); + READ_STRUCTURE_SIZE("pageflags", pageflags); +- READ_STRUCTURE_SIZE("log", log); + + READ_MEMBER_OFFSET("page.flags", page.flags); + READ_MEMBER_OFFSET("page._count", page._count); +@@ -1940,9 +1962,20 @@ read_vmcoreinfo(void) + READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); + READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); + READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list); +- READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); +- READ_MEMBER_OFFSET("log.len", log.len); +- READ_MEMBER_OFFSET("log.text_len", log.text_len); ++ ++ READ_STRUCTURE_SIZE("printk_log", printk_log); ++ if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { ++ info->flag_use_printk_log = TRUE; ++ READ_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec); ++ READ_MEMBER_OFFSET("printk_log.len", printk_log.len); ++ READ_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len); ++ } else { ++ info->flag_use_printk_log = FALSE; ++ READ_STRUCTURE_SIZE("log", printk_log); ++ READ_MEMBER_OFFSET("log.ts_nsec", printk_log.ts_nsec); ++ READ_MEMBER_OFFSET("log.len", printk_log.len); ++ READ_MEMBER_OFFSET("log.text_len", printk_log.text_len); ++ } + + READ_ARRAY_LENGTH("node_data", node_data); + READ_ARRAY_LENGTH("pgdat_list", pgdat_list); +@@ -3710,13 +3743,13 @@ dump_log_entry(char *logptr, int fp) + ulonglong nanos; + ulong rem; + +- text_len = USHORT(logptr + OFFSET(log.text_len)); +- ts_nsec = ULONGLONG(logptr + OFFSET(log.ts_nsec)); ++ text_len = USHORT(logptr + OFFSET(printk_log.text_len)); ++ ts_nsec = ULONGLONG(logptr + OFFSET(printk_log.ts_nsec)); + + nanos = (ulonglong)ts_nsec / (ulonglong)1000000000; + rem = (ulonglong)ts_nsec % (ulonglong)1000000000; + +- msg = logptr + SIZE(log); ++ msg = logptr + SIZE(printk_log); + + sprintf(buf, "[%5lld.%06ld] ", nanos, rem/1000); + +@@ -3754,7 +3787,7 @@ log_from_idx(unsigned int idx, char *logbuf) + * the buffer. + */ + +- msglen = USHORT(logptr + OFFSET(log.len)); ++ msglen = USHORT(logptr + OFFSET(printk_log.len)); + if (!msglen) + logptr = logbuf; + +@@ -3775,9 +3808,9 @@ log_next(unsigned int idx, char *logbuf) + * return the one after that. + */ + +- msglen = USHORT(logptr + OFFSET(log.len)); ++ msglen = USHORT(logptr + OFFSET(printk_log.len)); + if (!msglen) { +- msglen = USHORT(logbuf + OFFSET(log.len)); ++ msglen = USHORT(logbuf + OFFSET(printk_log.len)); + return msglen; + } + +diff --git a/makedumpfile-1.5.4/makedumpfile.h b/makedumpfile-1.5.4/makedumpfile.h +index c504bfb..3a7e61a 100644 +--- a/makedumpfile-1.5.4/makedumpfile.h ++++ b/makedumpfile-1.5.4/makedumpfile.h +@@ -893,6 +893,7 @@ struct DumpInfo { + int flag_force; /* overwrite existing stuff */ + int flag_exclude_xen_dom;/* exclude Domain-U from xen-kdump */ + int flag_dmesg; /* dump the dmesg log out of the vmcore file */ ++ int flag_use_printk_log; /* did we read printk_log symbol name? */ + int flag_nospace; /* the flag of "No space on device" error */ + unsigned long vaddr_for_vtop; /* virtual address for debugging */ + long page_size; /* size of page */ +@@ -1176,6 +1177,7 @@ struct size_table { + long list_head; + long node_memblk_s; + long nodemask_t; ++ long printk_log; + + /* + * for Xen extraction +@@ -1198,7 +1200,6 @@ struct size_table { + long cpumask_t; + long kexec_segment; + long elf64_hdr; +- long log; + + long pageflags; + }; +@@ -1337,11 +1338,11 @@ struct offset_table { + long p_memsz; + } elf64_phdr; + +- struct log_s { ++ struct printk_log_s { + long ts_nsec; + long len; + long text_len; +- } log; ++ } printk_log; + + }; + +-- +1.8.3.1 + diff --git a/SOURCES/kexec-tools-2.0.4-makedumpfile-disable-mmap.patch b/SOURCES/kexec-tools-2.0.4-makedumpfile-disable-mmap.patch deleted file mode 100644 index 125b62f..0000000 --- a/SOURCES/kexec-tools-2.0.4-makedumpfile-disable-mmap.patch +++ /dev/null @@ -1,36 +0,0 @@ -makedumpfile: disable mmap read - -There's a kernel bug for mapping mem ranges which end with -an address not aligned to page boundry. It's still not resolved -in upstream, so let's disable mmap read for now as a workaround. - -Once upstream got a right fix we can revert this patch. - -Signed-off-by: Dave Young <dyoung@redhat.com> ---- - makedumpfile.c | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- kexec-tools/makedumpfile-1.5.4/makedumpfile.c.orig -+++ kexec-tools/makedumpfile-1.5.4/makedumpfile.c -@@ -3144,6 +3144,12 @@ out: - if (info->dump_level & DL_EXCLUDE_FREE) - setup_page_is_buddy(); - -+ /* There's a kernel bug for mapping mem ranges which end with -+ * an address not aligned to page boundry. It's still not resolved -+ * in upstream, so let's disable mmap read for now. -+ */ -+ info->flag_usemmap = FALSE; -+#if 0 - if (!initialize_mmap()) { - /* this kernel does not support mmap of vmcore */ - DEBUG_MSG("Kernel can't mmap vmcore, using reads.\n"); -@@ -3152,6 +3158,7 @@ out: - DEBUG_MSG("read %s with mmap()\n", info->name_memory); - info->flag_usemmap = TRUE; - } -+#endif - - return TRUE; - } diff --git a/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-stack-smashing-happend-in-extreme-case.patch b/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-stack-smashing-happend-in-extreme-case.patch new file mode 100644 index 0000000..044cb61 --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-stack-smashing-happend-in-extreme-case.patch @@ -0,0 +1,43 @@ +From 401e037e5e9527134c594b8923342a69ff38b7cb Mon Sep 17 00:00:00 2001 +From: Arthur Zou <zzou@redhat.com> +Date: Wed, 12 Mar 2014 13:05:18 +0800 +Subject: [PATCH] vmcore-dmesg stack smashing happend in extreme case + +Description +in dump_dmesg_structured() the out_buf size is 4096, and if the +length is less than 4080( 4096-16 ) it won't really write out. +Normally, after writing one or four chars to the out_buf, it will +check the length of out_buf. But in extreme cases, 19 chars was +written to the out_buf before checking the length. This may cause +the stack corruption. If the length was 4079 (won't realy write out), +and then write 19 chars to it. the out_buf will overflow. + +Solution +Change 16 to 64 thus can make sure that always have 64bytes before +moving to next records. why using 64 is that a long long int can take +20 bytes. so the length of timestamp can be 44 ('[','.',']',' ') in +extreme case. + +Signed-off-by: Arthur Zou <zzou@redhat.com> +Acked-by: Vivek Goyal <vgoyal@redhat.com> +Signed-off-by: Simon Horman <horms@verge.net.au> +--- + vmcore-dmesg/vmcore-dmesg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/vmcore-dmesg/vmcore-dmesg.c b/vmcore-dmesg/vmcore-dmesg.c +index 0345660..e15cd91 100644 +--- a/vmcore-dmesg/vmcore-dmesg.c ++++ b/vmcore-dmesg/vmcore-dmesg.c +@@ -674,7 +674,7 @@ static void dump_dmesg_structured(int fd) + else + out_buf[len++] = c; + +- if (len >= OUT_BUF_SIZE - 16) { ++ if (len >= OUT_BUF_SIZE - 64) { + write_to_stdout(out_buf, len); + len = 0; + } +-- +1.8.4.2 + diff --git a/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-struct_val_u64-not-casting-u64-to-u32.patch b/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-struct_val_u64-not-casting-u64-to-u32.patch new file mode 100644 index 0000000..e2e43ff --- /dev/null +++ b/SOURCES/kexec-tools-2.0.4-vmcore-dmesg-struct_val_u64-not-casting-u64-to-u32.patch @@ -0,0 +1,31 @@ +From 158d7638f1669a8d34def55ab81f10993e68e76a Mon Sep 17 00:00:00 2001 +From: WANG Chao <chaowang@redhat.com> +Date: Tue, 7 Jan 2014 01:37:34 +0800 +Subject: [PATCH] vmcore-dmesg: struct_val_u64() not casting u64 to u32 + +It seems gcc doesn't check return type from inline function. +struct_val_u64() should return u64 otherwise upper 32bit is lost. + +Signed-off-by: WANG Chao <chaowang@redhat.com> +Acked-by: Vivek Goyal <vgoyal@redhat.com> +Signed-off-by: Simon Horman <horms@verge.net.au> +--- + vmcore-dmesg/vmcore-dmesg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/vmcore-dmesg/vmcore-dmesg.c b/vmcore-dmesg/vmcore-dmesg.c +index 0f477c0..df14c89 100644 +--- a/vmcore-dmesg/vmcore-dmesg.c ++++ b/vmcore-dmesg/vmcore-dmesg.c +@@ -529,7 +529,7 @@ static inline uint32_t struct_val_u32(char *ptr, unsigned int offset) + return(file32_to_cpu(*(uint32_t *)(ptr + offset))); + } + +-static inline uint32_t struct_val_u64(char *ptr, unsigned int offset) ++static inline uint64_t struct_val_u64(char *ptr, unsigned int offset) + { + return(file64_to_cpu(*(uint64_t *)(ptr + offset))); + } +-- +1.8.4.2 + diff --git a/SOURCES/mkdumprd b/SOURCES/mkdumprd index 6de1755..1a6d21d 100644 --- a/SOURCES/mkdumprd +++ b/SOURCES/mkdumprd @@ -14,7 +14,7 @@ SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa" SAVE_PATH=$(grep ^path $conf_file| cut -d' ' -f2) [ -z "$SAVE_PATH" ] && SAVE_PATH="/var/crash" extra_modules="" -dracut_args=("--hostonly" "-o" "plymouth dash") +dracut_args=("--hostonly" "--hostonly-cmdline" "-o" "plymouth dash") OVERRIDE_RESETTABLE=0 perror_exit() { @@ -134,6 +134,7 @@ is_readonly_mount() { #Function: get_ssh_size #$1=dump target +#called from while loop and shouldn't read from stdin, so we're using "ssh -n" get_ssh_size() { local _opt _out _size _opt="-i $SSH_KEY_LOCATION -o BatchMode=yes -o StrictHostKeyChecking=yes" @@ -141,19 +142,21 @@ get_ssh_size() { [ $? -ne 0 ] && { perror_exit "checking remote ssh server available size failed." } - #ssh output removed the line break, so print $11 instead of $4 - _size=$(echo -n $_out|tail -1 | awk '{print $11}') + + #ssh output removed the line break, so print field NF-2 + _size=$(echo -n $_out| awk '{avail=NF-2; print $avail}') echo -n $_size } #mkdir if save path does not exist on ssh dump target #$1=ssh dump target #caller should ensure write permission on $DUMP_TARGET:$SAVE_PATH +#called from while loop and shouldn't read from stdin, so we're using "ssh -n" mkdir_save_path_ssh() { local _opt _dir _opt="-i $SSH_KEY_LOCATION -o BatchMode=yes -o StrictHostKeyChecking=yes" - ssh -q $_opt $1 mkdir -p $SAVE_PATH 2>&1 > /dev/null + ssh -qn $_opt $1 mkdir -p $SAVE_PATH 2>&1 > /dev/null _ret=$? if [ $_ret -ne 0 ]; then perror_exit "mkdir failed on $DUMP_TARGET:$SAVE_PATH" @@ -165,7 +168,7 @@ mkdir_save_path_ssh() if [ $_ret -ne 0 ]; then perror_exit "Could not create temporary directory on $DUMP_TARGET:$SAVE_PATH. Make sure user has write permission on destination" fi - ssh -q $_opt $1 rmdir $_dir + ssh -qn $_opt $1 rmdir $_dir return 0 } @@ -344,18 +347,36 @@ get_block_dump_target() { local _target - if is_ssh_dump_target || is_nfs_dump_target; then - return - fi - _target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw" /etc/kdump.conf 2>/dev/null |awk '{print $2}') + _target=$(get_user_configured_dump_disk) [ -n "$_target" ] && echo $(to_dev_name $_target) && return #get rootfs device name - _target=$(findmnt -k -f -n -o SOURCE /) + _target=$(get_root_fs_device) [ -b "$_target" ] && echo $(to_dev_name $_target) } +# If no dump disk is specified make sure /var/crash is not mounted on a +# separate disk. +check_block_dump_target() +{ + local _target + local _mntpoint + + is_user_configured_dump_target && return + + _target=$(get_root_fs_device) + if [ -b "$_target" ]; then + mkdir -p $SAVE_PATH + _mntpoint=`df $SAVE_PATH | tail -1 | awk '{print $NF}'` + if [ "$_mntpoint" != "/" ]; then + perror "No dump target specified. Default dump target is rootfs block device." + perror "But dump path $SAVE_PATH is not backed by rootfs block device. " + perror_exit "Either explicitly specify a dump target or specify a dump path backed by rootfs block device" + fi + fi +} + get_default_action_target() { local _target @@ -451,10 +472,6 @@ check_resettable() return 1 } -if ! check_resettable; then - exit 1 -fi - # $1: maj:min is_crypt() { @@ -465,7 +482,7 @@ is_crypt() eval "$line" [[ "$ID_FS_TYPE" = "crypto_LUKS" ]] && { dev=$(udevadm info --query=all --path=/sys/dev/block/$majmin | awk -F= '/DEVNAME/{print $2}') - perror "Device $dev is encrypted, can not be used in kdump." + echo "Device $dev is encrypted." return 0 } return 1 @@ -480,20 +497,19 @@ check_crypt() [ $_ret -eq 0 ] && return - if [ $_ret -eq 1 ]; then - _target=$(get_block_dump_target) - perror "Can not save vmcore to target device $_target." - elif [ $_ret -eq 2 ]; then - perror "Default action is dump_to_rootfs but can not save vmcore to root device." - fi - return 1 } -if ! check_crypt; then +check_block_dump_target + +if ! check_resettable; then exit 1 fi +if ! check_crypt; then + echo "Warning: Encrypted device is in dump path. User will prompted for password during second kernel boot." +fi + # firstly get right SSH_KEY_LOCATION keyfile=$(awk '/^sshkey/ {print $2}' $conf_file) if [ -f "$keyfile" ]; then diff --git a/SOURCES/supported-kdump-targets.txt b/SOURCES/supported-kdump-targets.txt new file mode 100644 index 0000000..65c95fc --- /dev/null +++ b/SOURCES/supported-kdump-targets.txt @@ -0,0 +1,105 @@ +Supported Kdump Targets + +This document try to list all supported kdump targets, and those supported +or unknown/tech-preview targets, this can help users to decide whether a dump +solution is available. + +Dump Target support status +========================== +This section tries to come up with some kind of guidelines in terms of +what dump targets are supported/not supported. Whatever is listed here +is not binding in any manner. It is just sharing of current understanding +and if something is not right, this section needs to be edited. + +Following are 3 lists. First one contains supported targets. These are +generic configurations which should work and some configuration most +likely has worked in testing. Second list is known unsupported targets. +These targets we know either don't work or we don't support. And third +list is unknown/tech-preview. We either don't yet know the status of kdump +on these targets or these are under tech-preview. + +Note, these lists are not set in stone and can be changed at any point of +time. Also these lists might not be complete. We will add/remove items to +it as we get more testing information. Also, there are many corner cases +which can't possibly be listed. For example in general we might be +supporting software iscsi but there might be some configurations of it +which don't work. + +So if any target is listed in supported section, it does not mean it works +in all possible configurations. It just means that in common configurations +it should work but there can be issues with particular configurations which +are not supported. As we come to know of particular issues, we will keep on +updating lists accordingly. + + +Supported Dump targets +---------------------- +storage: + LVM volume (no thinp) + FC disks (qla2xxx, lpfc, bnx2fc, bfa) + software initiator based iSCSI + software RAID (mdraid) + hardware RAID (cciss, hpsa, megaraid_sas, mpt2sas, aacraid) + SCSI/SATA disks + +network: + Hardware using kernel modules: (tg3, igb, ixgbe, sfc, e1000e, bna, + cnic, netxen_nic, qlge, bnx2x, bnx, qlcnic, be2net, enic, + virtio-net, ixgbevf, igbvf) + protocol: ipv4 + bonding + vlan + bridge + team + vlan tagged bonding + bridge over bond/team/vlan + +hypervisor: + kvm + xen (Supported in select configurations only) + +filesystem: + ext[234] + btrfs + xfs + +firmware: + BIOS + UEFI + +hypervisor: + VMWare ESXi 4.1 and 5.1 + Hyper-V 2012 R2 (RHEL Gen1 UP Guest only) + +Unsupported Dump targets +------------------------ +storage: + BIOS RAID + Thin provisioning volume + hardware iSCSI + FCoE + legacy IDE + glusterfs + gfs2/clvm/halvm + +network: + hardware using kernel modules: (sfc SRIOV, cxgb4vf, pch_gbe) + protocol: ipv6 + wireless + Infiniband (IB) + vlan over bridge/team + +Unknown/tech-preview +-------------------- +storage: + PCI Express based SSDs + +hypervisor: + Hyper-V 2008 + Hyper-V 2012 + + +Useful Links +============ +[1] RHEL6: Enabling kdump for full-virt (HVM) Xen DomU + (https://access.redhat.com/knowledge/solutions/92943) diff --git a/SPECS/kexec-tools.spec b/SPECS/kexec-tools.spec index 47d5af4..498ffd3 100644 --- a/SPECS/kexec-tools.spec +++ b/SPECS/kexec-tools.spec @@ -1,6 +1,6 @@ Name: kexec-tools Version: 2.0.4 -Release: 13%{?dist} +Release: 30%{?dist} License: GPLv2 Group: Applications/System Summary: The kexec/kdump userspace component. @@ -17,7 +17,7 @@ Source9: http://downloads.sourceforge.net/project/makedumpfile/makedumpfile/1.5. Source10: kexec-kdump-howto.txt Source11: firstboot_kdump.py Source12: mkdumprd.8 -Source13: kexec-tools-po.tar.gz +Source13: kexec-tools-po-20131224.tgz Source14: 98-kexec.rules Source15: kdump.conf.5 Source16: kdump.service @@ -25,6 +25,8 @@ Source17: rhcrashkernel-param Source18: kdump.sysconfig.s390x Source19: eppic_030413.tar.gz Source20: kdump-lib.sh +Source21: kdump-in-cluster-environment.txt +Source22: supported-kdump-targets.txt ####################################### # These are sources for mkdumpramfs @@ -38,7 +40,8 @@ Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units Requires(pre): coreutils sed zlib -Requires: dracut, dracut-network, ethtool +Requires: dracut >= 033-145 +Requires: dracut-network, ethtool BuildRequires: zlib-devel zlib zlib-static elfutils-devel-static glib2-devel bzip2-devel ncurses-devel bison flex lzo-devel snappy-devel BuildRequires: pkgconfig intltool gettext BuildRequires: systemd-units @@ -69,6 +72,7 @@ Patch101: kexec-tools-2.0.4-kdump-x86-Process-multiple-Crash-kernel-in-proc-iome # Patches 301 through 400 are meant for ppc64 kexec-tools enablement # Patch301: kexec-tools-2.0.4-makedumpfile-Add-vmap_area_list-definition-for-ppc-ppc64.patch +Patch302: kexec-tools-2.0.4-makedumpfile-Support-to-filter-dump-for-kernels-that-use.patch # # Patches 401 through 500 are meant for s390 kexec-tools enablement @@ -88,7 +92,14 @@ Patch607: kexec-tools-2.0.4-makedumpfile-Update-pfn_cyclic-when-the-cyclic-buffe Patch608: kexec-tools-2.0.4-makedumpfile-Use-divideup-to-calculate-maximum-required-bit.patch Patch609: kexec-tools-2.0.4-makedumpfile-cache-Allocate-buffers-at-initialization-t.patch Patch610: kexec-tools-2.0.4-makedumpfile-cache-Reuse-entry-in-pending-list.patch -Patch611: kexec-tools-2.0.4-makedumpfile-disable-mmap.patch +Patch612: kexec-tools-2.0.4-makedumpfile-Understand-v3.11-rc4-dmesg.patch +Patch613: kexec-tools-2.0.4-makedumpfile-Assign-non-printable-value-as-short-option.patch +Patch614: kexec-tools-2.0.4-makedumpfile-Add-help-and-man-message-for-help.patch +Patch615: kexec-tools-2.0.4-makedumpfile-Add-non-mmap-option-to-disable-mmap-manually.patch +Patch616: kexec-tools-2.0.4-makedumpfile-Fall-back-to-read-when-mmap-fails.patch +Patch617: kexec-tools-2.0.4-vmcore-dmesg-struct_val_u64-not-casting-u64-to-u32.patch +Patch618: kexec-tools-2.0.4-makedumpfile-Improve-progress-information-for-huge-memor.patch +Patch619: kexec-tools-2.0.4-vmcore-dmesg-stack-smashing-happend-in-extreme-case.patch %description kexec-tools provides /sbin/kexec binary that facilitates a new @@ -127,11 +138,18 @@ tar -z -x -v -f %{SOURCE19} %patch608 -p1 %patch609 -p1 %patch610 -p1 -%patch611 -p1 %patch001 -p1 %patch002 -p1 %patch003 -p1 - +%patch612 -p1 +%patch302 -p1 +%patch613 -p1 +%patch614 -p1 +%patch615 -p1 +%patch616 -p1 +%patch617 -p1 +%patch618 -p1 +%patch619 -p1 tar -z -x -v -f %{SOURCE13} @@ -157,7 +175,9 @@ export CFLAGS="-O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2" --sbindir=/sbin rm -f kexec-tools.spec.in # setup the docs -cp %{SOURCE10} . +cp %{SOURCE10} . +cp %{SOURCE21} . +cp %{SOURCE22} . make %ifarch %{ix86} x86_64 ia64 ppc64 s390x @@ -205,6 +225,8 @@ install -m 755 %{SOURCE17} $RPM_BUILD_ROOT/usr/sbin/rhcrashkernel-param %ifarch %{ix86} x86_64 ia64 ppc64 s390x install -m 755 makedumpfile-1.5.4/makedumpfile $RPM_BUILD_ROOT/sbin/makedumpfile install -m 644 makedumpfile-1.5.4/makedumpfile.8.gz $RPM_BUILD_ROOT/%{_mandir}/man8/makedumpfile.8.gz +install -m 644 makedumpfile-1.5.4/makedumpfile.conf.5.gz $RPM_BUILD_ROOT/%{_mandir}/man5/makedumpfile.conf.5.gz +install -m 644 makedumpfile-1.5.4/makedumpfile.conf $RPM_BUILD_ROOT/%{_sysconfdir}/makedumpfile.conf.sample install -m 755 makedumpfile-1.5.4/eppic_makedumpfile.so $RPM_BUILD_ROOT/%{_libdir}/eppic_makedumpfile.so %endif make -C kexec-tools-po install DESTDIR=$RPM_BUILD_ROOT @@ -323,6 +345,9 @@ done %{_bindir}/* %{_datadir}/kdump %{_prefix}/lib/kdump +%ifarch %{ix86} x86_64 ia64 ppc64 s390x +%{_sysconfdir}/makedumpfile.conf.sample +%endif %config(noreplace,missingok) %{_sysconfdir}/sysconfig/kdump %config(noreplace,missingok) %{_sysconfdir}/kdump.conf %ifnarch s390x @@ -337,6 +362,8 @@ done %doc COPYING %doc TODO %doc kexec-kdump-howto.txt +%doc kdump-in-cluster-environment.txt +%doc supported-kdump-targets.txt %ifarch %{ix86} x86_64 ia64 ppc64 s390x %files eppic @@ -344,6 +371,75 @@ done %endif %changelog +* Wed Mar 26 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-30 +- fix issue when dump path is mounted on nfs. + +* Tue Mar 18 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-29 +- vmcore-dmesg: stack smashing fix +- get_ssh_size fix for localized df output + +* Mon Mar 10 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-28 +- Document supported/unsupported/unknown list of dump targets +- Warn user about save vmcore path mounted by another disk + +* Tue Mar 04 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-27 +- mkdumprd: call dracut with --hostonly-cmdline (dracut >= 033-145) +- warning on secure boot enabled platform + +* Tue Mar 04 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-26 +- Regression fix on wdt kernel module install + +* Mon Mar 03 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-25 +- kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel +- Relax restriction of dumping on encrypted target +- ssh dump: create random-seed manually + +* Mon Feb 17 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-24 +- remove selinux flipping when propagating ssh key +- add kdump-in-cluster-environment.txt to rpm pkg +- Secure Boot status check warning +- Some watchdog driver support + +* Wed Jan 29 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-23 +- Add acpi_no_memhotplug to kdump kernel +- Add fence kdump support + +* Fri Jan 24 2014 Daniel Mach <dmach@redhat.com> - 2.0.4-22 +- Mass rebuild 2014-01-24 + +* Wed Jan 22 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-21 +- makedumpfile: Improve progress information for huge memory system +- s390: use nr_cpus=1 instead of maxcpus=1 + +* Fri Jan 17 2014 WANG Chao <chaowang@redhat.com> - 2.0.4-20 +- vmcore-dmesg: fix timestamp error in vmcore-dmesg.txt +- makedumpfile: re-enable mmap() and introduce --non-mmap +- kdump.conf uncomment default core_collector line +- fix an issue when 'ssh' directive appearing in kdump.conf, the rest part of + lines in this file are ignored + +* Fri Dec 27 2013 Daniel Mach <dmach@redhat.com> - 2.0.4-19 +- Mass rebuild 2013-12-27 + +* Tue Dec 24 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-18 +- kexec-tools.spec: only install makedumpfile.conf on ix86 x86_64 ppc64 s390x + +* Tue Dec 24 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-17 +- kexec-tools.spec: remove /etc/kdump-adv-conf/ + +* Tue Dec 24 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-16 +- update translation files + +* Tue Dec 17 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-15 +- makedumpfile: default to lzo compression +- makedumpfile: add makedumpfile.conf.sample and its manpage + +* Tue Dec 03 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-14 +- Add rd.memdebug in kdump module +- kdumpctl: Avoid leaking fd to subshell +- makedumpfile: Understand >= v3.11-rc4 dmesg +- makedumpfile, ppc: Support to filter dump for kernels that use CONFIG_SPARSEMEM_VMEMMAP. + * Fri Nov 15 2013 WANG Chao <chaowang@redhat.com> - 2.0.4-13 - makedumpfile: disable mmap()