#! /bin/sh
# PCP QA Test No. 115
# exercises pmie_check functionality
#
# Copyright (c) 2002 Silicon Graphics, Inc.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

[ -f /etc/gentoo-release ] && _notrun "Gentoo is lacking chkconfig support"
[ -f /etc/slackware-version ] && _notrun "Slackware is lacking chkconfig support"

if [ -z "$PCP_PMIECONTROL_PATH" ]
then
    echo "Error: botched installation - PCP_PMIECONTROL_PATH not defined"
    exit 1
fi

_cleanup()
{
    if $needclean
    then
	_restore_config $PCP_PMIECONTROL_PATH
	_restore_config $PCP_PMCDCONF_PATH
    fi
    $sudo rm -rf $tmp.nogo $tmp.locker
    for file in $tmp.*
    do
	echo >>$here/$seq.full
	echo "=== $file ===" >>$here/$seq.full
	cat $file >>$here/$seq.full
    done
    _restore_auto_restart pmcd
    [ "$pmie_state" = on ] && _change_config pmie on
    if $pmie_was_running
    then
	# don't trigger systemctl "request repeated too quickly" snarfoo
	#
	sleep 2
	_restore_auto_restart pmie
	_service pmie start >>$here/$seq.full 2>&1
	_wait_for_pmie
    else
	_service pmie stop >>$here/$seq.full 2>&1
	$sudo $PCP_BINADM_DIR/pmsignal -a -s TERM pmie >>$here/$seq.full 2>&1
	_wait_pmie_end
    fi
    $sudo rm -f $tmp.*
}

status=1	# failure is the default!
$sudo rm -f $tmp.* $seq.full
signal=$PCP_BINADM_DIR/pmsignal
needclean=false
trap "_cleanup; exit \$status" 0 1 2 3 15

_stop_auto_restart pmcd
pmie_state=`_get_config pmie`

lhost=`hostname | sed -e 's/\..*//'`
host=`_get_fqdn`
remote=`./getpmcdhosts -n 1 -L 2>$tmp.out`
rhost=`echo $remote | sed -e 's/\..*//'`
[ -z "$remote" ] && _notrun `cat $tmp.out`
echo "remote=$remote" >>$here/$seq.full

tmpdir=`dirname $tmp`

# mkdir: error lines ... not consistency in format, so cull 'em
# ditto for cd errors reported from pmie_check
#
# and these ones (w/out systemctl) ...
# $PCP_RC_DIR/pmie:
# Error: PCP inference engine control file $PCP_PMIECONTROL_PATH
# is missing!  Cannot start any Performance Co-Pilot inference engine(s).
# failed (status=1)
#
# and if systemctl in the mix, then the error cases produce chatter
# like:
# Job for pmie.service failed because the service did not take the steps required by its unit configuration.
# See "systemctl status pmie.service" and "journalctl -xe" for details.
# or
# Job for pmie.service failed because of unavailable resources or another system error.
# or
# Job for pmie.service failed. See "systemctl status pmie.service" and "journalctl -xe" for details.
#
_filter()
{
    tee -a $here/$seq.full \
    | sed \
	-e "s;$tmp;TMP;g" \
	-e "s;$tmpdir;TMPDIR;g" \
	-e "s/$seq-$$/SEQ-PID/g" \
	-e "s;$PCP_BIN_DIR/pmie;\$PCP_BIN_DIR/pmie;" \
	-e 's/line [0-9][0-9]*:/line N:/' \
	-e 's/$/ /' \
	-e 's/\([ "]\)'"$remote"'\([ "]\)/\1REMOTEHOST\2/g' \
	-e 's/\([ "]\)'"$rhost"'\([ "]\)/\1REMOTEHOST\2/g' \
	-e 's/\([ "]\)'"$host"'\([ "]\)/\1LOCALHOST\2/g' \
	-e 's/\([ "]\)local:\([ "]\)/\1LOCALHOST\2/g' \
	-e "s;/private/tmp;/tmp;g" \
	-e '/ try .* different logfile, skip/d' \
	-e '/^.r[-w][-x]r[-w][-x]/s/.* TMP/... ls output ... TMP/' \
	-e '/mkdir:/d' \
	-e '/\/pmie_check:.*cd:/d' \
	-e 's/[ 	]*$//' \
	-e 's/^[ 	]*//' \
	-e '/\$PCP_RC_DIR\/pmie:/d' \
	-e '/^Error: PCP inference engine control file/d' \
	-e '/^is missing! /d' \
	-e '/^failed /d' \
	-e '/^Job for pmie.service failed because /d' \
	-e '/^Job for pmie.service failed\. /d' \
	-e '/^See "systemctl  *status pmie.service" and/d' \
	-e '/^Start: /s/ .*/ DATE/' \
	-e '/^End: /s/ .*/ DATE/' \
    | $PCP_AWK_PROG '
/^Called from:/		{ skip = 1 }
skip == 0		{ print }
/end of pstree output/	{ skip = 0 }'
}

_count_pmies()
{
    count=0
    if [ -d $PCP_TMP_DIR/pmie ] 
    then cd $PCP_TMP_DIR/pmie
    else return 0
    fi

    ls -l >>$here/$seq.full
    plist=`ls -1`
    echo "plist=$plist" >>$here/$seq.full
    cd $here

    for process in $plist
    do
	$PCP_PS_PROG $PCP_PS_ALL_FLAGS -p $process >/dev/null 2>&1
	if [ $? = 1 ]
	then
	    echo "urk, $PCP_TMP_DIR/pmie/$process has no running pmie instance"
	else
	    count=`expr $count + 1`
	    $PCP_PS_PROG $PCP_PS_ALL_FLAGS -p $process >>$here/$seq.full
	    if which pstree >/dev/null 2>&1
	    then
		pstree $process >>$here/$seq.full
	    fi
	fi
    done

    return $count
}

# create a basic pmcd config file
cat >$tmp.pmcd.conf << EOF
# Installed by PCP QA test $seq on `date`
pmcd	2	dso	pmcd_init	pmda_pmcd.so
EOF

# create a pmie config file
cat >$tmp.conf << EOF
foo = sample.long.one;
doo = sample.long.ten;
EOF

# create pmie control files
cat >$tmp.control.v1.0 << EOF
\$version=1.0
$remote         n $tmp.log1 -c $tmp.conf
$remote         n $tmp.log2 -c $tmp.conf
LOCALHOSTNAME   n $tmp.log0 -c $tmp.conf
EOF
cat >$tmp.control.v1.1 << EOF
\$version=1.1
$remote         n n $tmp.log1 -c $tmp.conf
$remote         n n $tmp.log2 -c $tmp.conf
LOCALHOSTNAME   y n $tmp.log0 -c $tmp.conf
EOF

pmie_was_running=false
if [ -f $PCP_RUN_DIR/pmie.pid ]
then
    pmie_was_running=true
    echo "Found pmie PID `cat $PCP_RUN_DIR/pmie.pid` running @ start" >>$here/$seq.full
    _stop_auto_restart pmie
    _service pmie stop >>$here/$seq.full 2>&1
    _wait_pmie_end
fi

# real QA test starts here
_save_config $PCP_PMIECONTROL_PATH
_save_config $PCP_PMCDCONF_PATH
needclean=true
$sudo cp $tmp.pmcd.conf $PCP_PMCDCONF_PATH 

$sudo $signal -a -s TERM pmie >/dev/null 2>&1
_wait_pmie_end
_change_config pmcd on
_change_config pmie off
$sudo rm -f $PCP_TMP_DIR/pmie/*

_count_pmies
echo "pmie count at start of QA testing: $?" | tee -a $here/$seq.full
echo

echo === check default install operation === | tee -a $here/$seq.full
_service pmie restart 2>&1 | _filter_pmie_start
_wait_for_pmie
_count_pmies
# chkconfig no longer controls start-ability, expect non-zero
echo "pmie count after chkconfig pmie off: $?"
echo
# don't trigger systemctl "request repeated too quickly" snarfoo
#
sleep 2

echo === check for missing control file === | tee -a $here/$seq.full
_change_config pmie off
$sudo rm -f $PCP_PMIECONTROL_PATH
_change_config pmie on
_service pmie restart 2>&1 | _filter_pmie_start | _filter
_count_pmies
echo "pmie count after attempt without control file: $?"
echo
# don't trigger systemctl "request repeated too quickly" snarfoo
#
sleep 2

echo === check pmie_check and custom configs === | tee -a $here/$seq.full
echo "--- v1.0 ---" | tee -a $here/$seq.full
pmie_check -V -V -N -c $tmp.control.v1.0 >$tmp.log
cat $tmp.log | _filter
rm -f $tmp.log
# don't bother counting pmie processes, as -N is used above!
#
echo
echo "--- v1.1 ---" | tee -a $here/$seq.full
pmie_check -V -V -N -c $tmp.control.v1.1 >$tmp.log
cat $tmp.log | _filter
rm -f $tmp.log
# don't bother counting pmie processes, as -N is used above!
#
echo

echo "=== warnings ... ===" | tee -a $here/$seq.full
echo "--- stale lock file ---" | tee -a $here/$seq.full
case=1
cat >$tmp.warning.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.locker/ok.$case -c $tmp.conf
EOF
mkdir $tmp.locker
touch -t `pmdate -35M %Y%m%d%H%M` $tmp.locker/ok.$case.lock
pmie_check -V -V -N -c $tmp.warning.$case >$tmp.warning.$case.log
cat $tmp.warning.$case.log | _filter
rm -f $tmp.warning.$case.log
echo
echo "--- existing lock file ---" | tee -a $here/$seq.full
case=`expr $case + 1`
cat >$tmp.warning.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.locker/ok.$case -c $tmp.conf
EOF
rm -f $tmp.locker/ok.$case.lock
touch $tmp.locker/ok.$case.lock
$PCP_PS_PROG $PCP_PS_ALL_ARGS | grep '([P]ID)|([p]mie)' >>$here/$seq.full
pmie_check -V -V -N -c $tmp.warning.$case >$tmp.warning.$case.log
$PCP_PS_PROG $PCP_PS_ALL_ARGS | grep '([P]ID)|([p]mie)' >>$here/$seq.full
cat $tmp.warning.$case.log | _filter
rm -f $tmp.warning.$case.log
echo
echo "--- can't create lock file ---" | tee -a $here/$seq.full
case=`expr $case + 1`
cat >$tmp.warning.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.locker/ok.$case -c $tmp.conf
EOF
chmod 555 $tmp.locker
pmie_check -V -V -N -c $tmp.warning.$case >$tmp.warning.$case.log
cat $tmp.warning.$case.log | _filter
rm -f $tmp.warning.$case.log
$sudo rm -rf $tmp.locker
echo
echo "--- bad in-line shell commands ---" | tee -a $here/$seq.full
case=`expr $case + 1`
cat >$tmp.warning.$case << EOF
\$version=1.1
\$date
\$PATH=/some/where/bad:\$PATH
\$IFS=a
LOCALHOSTNAME   n n $tmp.ok.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.warning.$case >$tmp.warning.$case.log
cat $tmp.warning.$case.log | _filter
rm -f $tmp.warning.$case.log
echo
echo "--- missing \$version but really v1.1 ... original control.local issue ---" | tee -a $here/$seq.full
case=`expr $case + 1`
cat >$tmp.warning.$case << EOF
LOCALHOSTNAME   n n $tmp.ok.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.warning.$case >$tmp.warning.$case.log
cat $tmp.warning.$case.log | _filter
rm -f $tmp.warning.$case.log
echo

echo "=== errors ... ===" | tee -a $here/$seq.full
echo "--- bad version ---" | tee -a $here/$seq.full
case=1
cat >$tmp.error.$case << EOF
\$version=0.0
LOCALHOSTNAME   n $tmp.bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- v1.0 socks != y|n ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.0
LOCALHOSTNAME   x $tmp.bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- v1.1 socks != y|n ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.1
LOCALHOSTNAME   n x $tmp.bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- v1.1 primary != y|n ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.1
LOCALHOSTNAME   x n $tmp.bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- v1.0 insufficient fields ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.0
LOCALHOSTNAME   n $tmp.bad.$case
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- v1.1 insufficient fields ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.bad.$case
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
mkdir $tmp.nogo
chmod 000 $tmp.nogo
case=`expr $case + 1`
echo "--- can't create dir for logfile ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.nogo/subdir/bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
case=`expr $case + 1`
echo "--- can't chdir to dir for logfile ---" | tee -a $here/$seq.full
cat >$tmp.error.$case << EOF
\$version=1.1
LOCALHOSTNAME   n n $tmp.nogo/bad.$case -c $tmp.conf
EOF
pmie_check -V -V -N -c $tmp.error.$case >$tmp.bad.$case.log
cat $tmp.bad.$case.log | _filter
rm -f $tmp.bad.$case.log
echo
$sudo rm -rf $tmp.nogo

$sudo $signal -a -s TERM pmie >/dev/null 2>&1
_change_config pmie off

status=0
exit
