commit c8136d5f0abe785223f4ea809073d6566aa36b58 Author: David Vossel Date: Wed Sep 25 18:14:13 2013 -0400 RemoteLXC port diff --git a/configure.ac b/configure.ac index 6c80891..cfc1b1f 100644 --- a/configure.ac +++ b/configure.ac @@ -1084,6 +1084,10 @@ CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib" AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files) AC_SUBST(CRM_CONFIG_DIR) +CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts" +AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data) +AC_SUBST(CRM_CONFIG_CTS) + CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm" AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files) AC_SUBST(CRM_LEGACY_CONFIG_DIR) @@ -1790,7 +1794,8 @@ cts/Makefile \ cts/CTSvars.py \ cts/LSBDummy \ cts/benchmark/Makefile \ - cts/benchmark/clubench \ + cts/benchmark/clubench \ + cts/lxc_autogen.sh \ cib/Makefile \ crmd/Makefile \ pengine/Makefile \ diff --git a/cts/CTSlab.py b/cts/CTSlab.py index 58506c3..f7e183c 100755 --- a/cts/CTSlab.py +++ b/cts/CTSlab.py @@ -95,6 +95,7 @@ class LabEnvironment(CtsLab): #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" self["experimental-tests"] = 0 + self["container-tests"] = 0 self["valgrind-tests"] = 0 self["unsafe-tests"] = 1 self["loop-tests"] = 1 @@ -148,6 +149,7 @@ def usage(arg, status=1): print "\t [--no-unsafe-tests] dont run tests that are unsafe for use with ocfs2/drbd" print "\t [--valgrind-tests] include tests using valgrind" print "\t [--experimental-tests] include experimental tests" + print "\t [--container-tests] include pacemaker_remote tests that run in lxc container resources" print "\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]" print "\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH" print "\t [--seed random_seed]" @@ -427,6 +429,9 @@ if __name__ == '__main__': elif args[i] == "--experimental-tests": Environment["experimental-tests"] = 1 + elif args[i] == "--container-tests": + Environment["container-tests"] = 1 + elif args[i] == "--set": skipthis=1 (name, value) = args[i+1].split('=') diff --git a/cts/CTStests.py b/cts/CTStests.py index b5dd69a..19f6ef4 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -73,6 +73,7 @@ class CTSTest: self.is_loop = 0 self.is_unsafe = 0 self.is_experimental = 0 + self.is_container = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark self.timer = {} # timers @@ -205,6 +206,8 @@ class CTSTest: return 0 elif self.is_experimental and not self.CM.Env["experimental-tests"]: return 0 + elif self.is_container and not self.CM.Env["container-tests"]: + return 0 elif self.CM.Env["benchmark"] and self.benchmark == 0: return 0 @@ -2473,4 +2476,118 @@ def TestList(cm, audits): result.append(bound_test) return result +################################################################### +class RemoteLXC(CTSTest): +################################################################### + def __init__(self, cm): + CTSTest.__init__(self,cm) + self.name="RemoteLXC" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.num_containers = 2 + self.is_container = 1 + self.failed = 0 + self.fail_string = "" + + def start_lxc_simple(self, node): + + rc = self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null") + if rc == 1: + return self.skipped() + + # restore any artifacts laying around from a previous test. + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") + + # generate the containers, put them in the config, add some resources to them + pats = [ ] + watch = self.create_watch(pats, 120) + watch.setwatch() + pats.append("process_lrm_event: LRM operation lxc1_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc2_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc-ms_start_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation lxc-ms_promote_0.*confirmed.*ok") + + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers) + self.set_timer("remoteSimpleInit") + watch.lookforall() + self.log_timer("remoteSimpleInit") + if watch.unmatched: + self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) + self.failed = 1 + + def cleanup_lxc_simple(self, node): + + pats = [ ] + # if the test failed, attempt to clean up the cib and libvirt environment + # as best as possible + if self.failed == 1: + # restore libvirt and cib + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r container1 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r container2 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc1 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc2 &>/dev/null") + self.CM.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null") + time.sleep(20) + return + + watch = self.create_watch(pats, 120) + watch.setwatch() + + pats.append("process_lrm_event: LRM operation container1_stop_0.*confirmed.*ok") + pats.append("process_lrm_event: LRM operation container2_stop_0.*confirmed.*ok") + + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null") + self.set_timer("remoteSimpleCleanup") + watch.lookforall() + self.log_timer("remoteSimpleCleanup") + + if watch.unmatched: + self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) + self.failed = 1 + + # cleanup libvirt + self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") + + def __call__(self, node): + '''Perform the 'RemoteLXC' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + self.start_lxc_simple(node) + self.cleanup_lxc_simple(node) + + self.CM.debug("Waiting for the cluster to recover") + self.CM.cluster_stable() + + if self.failed == 1: + return self.failure(self.fail_string) + + return self.success() + + def errorstoignore(self): + '''Return list of errors which should be ignored''' + return [ """Updating failcount for ping""", + """LogActions: Recover ping""", + """LogActions: Recover lxc-ms""", + """LogActions: Recover container""", + # The orphaned lxc-ms resource causes an expected transition error + # that is a result of the pengine not having knowledge that the + # ms resource used to be a clone. As a result it looks like that + # resource is running in multiple locations when it shouldn't... But in + # this instance we know why this error is occurring and that it is expected. + """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""", + """Resource lxc-ms .* is active on 2 nodes attempting recovery""", + """Unknown operation: fail""", + """notice: operation_finished: ping-""", + """notice: operation_finished: container""", + """notice: operation_finished: .*_monitor_0:.*:stderr""", + """(ERROR|error): sending stonithRA op to stonithd failed.""", + ] + +AllTestClasses.append(RemoteLXC) + # vim:ts=4:sw=4:et: diff --git a/cts/Makefile.am b/cts/Makefile.am index e01ac10..cb86db8 100644 --- a/cts/Makefile.am +++ b/cts/Makefile.am @@ -40,6 +40,7 @@ cts_DATA = README cts.supp cts_SCRIPTS = cluster_test \ CTSlab.py \ + lxc_autogen.sh \ LSBDummy \ $(top_srcdir)/fencing/fence_dummy diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in new file mode 100644 index 0000000..1d334c4 --- /dev/null +++ b/cts/lxc_autogen.sh.in @@ -0,0 +1,362 @@ +#!/bin/bash + +containers="2" +download=0 +share_configs=0 +# different than default libvirt network in case this is run nested in a KVM instance +addr="192.168.123.1" +restore=0 +restore_pcmk=0 +restore_all=0 +generate=0 +key_gen=0 +cib=0 +add_master=0 +verify=0 +working_dir="@CRM_CONFIG_CTS@/lxc" +curdir=$(pwd) + +function helptext() { + echo "lxc_autogen.sh - A tool for generating libvirt lxc containers for testing purposes." + echo "" + echo "Usage: lxc-autogen [options]" + echo "" + echo "Options:" + echo "-g, --generate Generate libvirt lxc environment in the directory this script is run from." + echo "-k, --key-gen Generate local pacemaker remote key only." + echo "-r, --restore-libvirt Restore the default network, and libvirt config to before this script ran." + echo "-p, --restore-cib Remove cib entries this script generated." + echo "-R, --restore-all Restore both libvirt and cib plus clean working directory. This will leave libvirt xml files though so rsc can be stopped properly." + echo "" + echo "-a, --add-cib Add remote-node entries for each lxc instance into the cib" + echo "-m, --add-master Add master resource shared between remote-nodes" + echo "-d, --download-agent Download and install the latest VirtualDomain agent." + echo "-s, --share-configs Copy container configs to all other known cluster nodes, (crm_node -l)" + echo "-c, --containers Specify the number of containers to generate, defaults to $containers. Used with -g" + echo "-n, --network What network to override default libvirt network to. Example: -n 192.168.123.1. Used with -g" + echo "-v, --verify Verify environment is capable of running lxc" + echo "" + exit $1 +} + +while true ; do + case "$1" in + --help|-h|-\?) helptext 0;; + -c|--containers) containers="$2"; shift; shift;; + -d|--download-agent) download=1; shift;; + -s|--share-configs) share_configs=1; shift;; + -n|--network) addr="$2"; shift; shift;; + -r|--restore-libvirt) restore=1; shift;; + -p|--restore-cib) restore_pcmk=1; shift;; + -R|--restore-all) + restore_all=1 + restore=1 + restore_pcmk=1 + shift;; + -g|--generate) generate=1; shift;; + -k|--key-gen) key_gen=1; shift;; + -a|--add-cib) cib=1; shift;; + -m|--add-master) add_master=1; shift;; + -v|--verify) verify=1; shift;; + "") break;; + *) helptext 1;; + esac +done + +if [ $verify -eq 1 ]; then + # verify virsh tool is available and that + # we can connect to lxc driver. + virsh -c lxc:/// list --all > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed" + exit 1 + fi + + cat /etc/selinux/config | grep -e "SELINUX.*=.*permissive" -e "SELINUX.*=.*enforcing" > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "/etc/selinux/config must have SELINUX set to permissive or enforcing mode." + exit 1 + fi + + ps x > /tmp/lxc-autogen-libvirt-test.txt + grep "libvirtd" /tmp/lxc-autogen-libvirt-test.txt + if [ $? -ne 0 ]; then + rm -f /tmp/lxc-autogen-libvirt-test.txt + echo "libvirtd isn't up." + exit 1 + fi + rm -f /tmp/lxc-autogen-libvirt-test.txt + + which rsync > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "rsync is required" + fi + +fi + +#strip last digits off addr +tmp="s/\.$(echo "$addr" | tr '.' ' ' | awk '{print $4}')$//g" +addr=$(echo $addr | sed -e ${tmp}) + +set_network() +{ + rm -f cur_network.xml + cat << END >> cur_network.xml + + default + 41ebdb84-7134-1111-a136-91f0f1119225 + + + + + + + + + +END + + ls restore_default.xml > /dev/null 2>&1 + if [ $? -ne 0 ]; then + virsh net-dumpxml default > restore_default.xml + fi + virsh net-destroy default + virsh net-undefine default + virsh net-define cur_network.xml + virsh net-start default +} + +generate_key() +{ + #generate pacemaker remote key + ls /etc/pacemaker/authkey > /dev/null 2>&1 + if [ $? != 0 ]; then + mkdir -p /etc/pacemaker + dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1 + fi +} + +generate() +{ + set_network + + # Generate libvirt domains in xml + for (( c=1; c <= $containers; c++ )) + do + rm -rf lxc$c-filesystem + mkdir -p lxc$c-filesystem/var/run/ + mkdir -p lxc$c-filesystem/usr/var/run + rm -f lxc$c.xml + cat << END >> lxc$c.xml + + lxc$c + 102400 + + exe + $working_dir/lxc$c-filesystem/launch-helper + + + + + + + + + + + + + + + + + +END + rm -f container$c.cib + cat << END >> container$c.cib + + + + + + + + + + + + + + + +END + + rm -f lxc-ms$c.cib + cat << END >> lxc-ms.cib + + + + + + + + + + + + +END + + rm -f lxc$c-filesystem/launch-helper + cat << END >> lxc$c-filesystem/launch-helper +#!/bin/bash +ifconfig eth0 $addr.10$c +route add 0.0.0.0 gw $addr.1 eth0 +hostname lxc$c +/usr/sbin/pacemaker_remoted +END + chmod 711 lxc$c-filesystem/launch-helper + + cat << END >> /etc/hosts +$addr.10$c lxc$c +END + done +} + +apply_cib_master() +{ + cibadmin -Q > cur.cib + export CIB_file=cur.cib + + cibadmin -o resources -C -x lxc-ms.cib + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + echo "" > tmp_constraint + cibadmin -o constraints -C -x tmp_constraint + echo "" > tmp_constraint + cibadmin -o constraints -C -x tmp_constraint > /dev/null 2>&1 + rm -f tmp_constraint + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +apply_cib_entries() +{ + node=$(crm_node -n) + + cibadmin -Q > cur.cib + export CIB_file=cur.cib + for tmp in $(ls container*.cib); do + cibadmin -o resources -C -x $tmp + + tmp=$(echo $tmp | sed -e 's/\.cib//g') + crm_resource -M -r $tmp -H $node + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +restore_cib() +{ + node=$(crm_node -n) + cibadmin -Q > cur.cib + export CIB_file=cur.cib + + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + echo "" > tmp_constraint + cibadmin -o constraints -D -x tmp_constraint + echo "" > tmp_constraint + cibadmin -o constraints -D -x tmp_constraint + rm -f tmp_constraint + done + cibadmin -o resources -D -x lxc-ms.cib + + for tmp in $(ls container*.cib); do + tmp=$(echo $tmp | sed -e 's/\.cib//g') + crm_resource -U -r $tmp -H $node + crm_resource -D -r $tmp -t primitive + done + unset CIB_file + + cibadmin --replace --xml-file cur.cib + rm -f cur.cib +} + +restore_libvirt() +{ + for tmp in $(ls lxc*.xml); do + tmp=$(echo $tmp | sed -e 's/\.xml//g') + virsh -c lxc:/// destroy $tmp > /dev/null 2>&1 + virsh -c lxc:/// undefine $tmp > /dev/null 2>&1 + + sed -i.bak "/...\....\....\..* ${tmp}/d" /etc/hosts + echo "$tmp destroyed" + done + + ls restore_default.xml > /dev/null 2>&1 + if [ $? -eq 0 ]; then + virsh net-destroy default > /dev/null 2>&1 + virsh net-undefine default > /dev/null 2>&1 + virsh net-define restore_default.xml + virsh net-start default + if [ $? -eq 0 ]; then + echo "default network restored" + fi + fi + rm -f restore_default.xml > /dev/null 2>&1 +} + +distribute_configs() +{ + local node + local id + while read id node + do + rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' $working_dir/lxc*.xml $node:/$working_dir + rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' $working_dir/lxc*-filesystem $node:/$working_dir + done < <(crm_node -l) +} + +mkdir -p $working_dir +cd $working_dir + +if [ $download -eq 1 ]; then + wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain + chmod 755 VirtualDomain + mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain +fi +if [ $restore_pcmk -eq 1 ]; then + restore_cib +fi +if [ $restore -eq 1 ]; then + restore_libvirt +fi +if [ $key_gen -eq 1 ]; then + generate_key +fi +if [ $generate -eq 1 ]; then + if [ $key_gen -eq 0]; then + generate_key + fi + generate +fi +if [ $cib -eq 1 ]; then + apply_cib_entries +fi +if [ $add_master -eq 1 ]; then + apply_cib_master +fi +if [ $share_configs -eq 1 ]; then + distribute_configs +fi +if [ $restore_all -eq 1 ]; then + ls | grep -v "lxc.\.xml" | xargs rm -rf +fi + +cd $curdir