Blob Blame History Raw
commit c8136d5f0abe785223f4ea809073d6566aa36b58
Author: David Vossel <dvossel@redhat.com>
Date:   Wed Sep 25 18:14:13 2013 -0400

    RemoteLXC port

diff --git a/configure.ac b/configure.ac
index 6c80891..cfc1b1f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1084,6 +1084,10 @@ CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib"
 AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files)
 AC_SUBST(CRM_CONFIG_DIR)
 
+CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts"
+AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data)
+AC_SUBST(CRM_CONFIG_CTS)
+
 CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm"
 AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files)
 AC_SUBST(CRM_LEGACY_CONFIG_DIR)
@@ -1790,7 +1794,8 @@ cts/Makefile					        	\
 	cts/CTSvars.py						\
 	cts/LSBDummy						\
 	cts/benchmark/Makefile					\
-		cts/benchmark/clubench				\
+	cts/benchmark/clubench				\
+	cts/lxc_autogen.sh						\
 cib/Makefile							\
 crmd/Makefile							\
 pengine/Makefile						\
diff --git a/cts/CTSlab.py b/cts/CTSlab.py
index 58506c3..f7e183c 100755
--- a/cts/CTSlab.py
+++ b/cts/CTSlab.py
@@ -95,6 +95,7 @@ class LabEnvironment(CtsLab):
         #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
 
         self["experimental-tests"] = 0
+        self["container-tests"] = 0
         self["valgrind-tests"] = 0
         self["unsafe-tests"] = 1
         self["loop-tests"] = 1
@@ -148,6 +149,7 @@ def usage(arg, status=1):
     print "\t [--no-unsafe-tests]          dont run tests that are unsafe for use with ocfs2/drbd"
     print "\t [--valgrind-tests]           include tests using valgrind"
     print "\t [--experimental-tests]       include experimental tests"
+    print "\t [--container-tests]          include pacemaker_remote tests that run in lxc container resources"
     print "\t [--oprofile 'node list']     list of cluster nodes to run oprofile on]"
     print "\t [--qarsh]                    use the QARSH backdoor to access nodes instead of SSH"
     print "\t [--seed random_seed]"
@@ -427,6 +429,9 @@ if __name__ == '__main__':
        elif args[i] == "--experimental-tests":
            Environment["experimental-tests"] = 1
 
+       elif args[i] == "--container-tests":
+           Environment["container-tests"] = 1
+
        elif args[i] == "--set":
            skipthis=1
            (name, value) = args[i+1].split('=')
diff --git a/cts/CTStests.py b/cts/CTStests.py
index b5dd69a..19f6ef4 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -73,6 +73,7 @@ class CTSTest:
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_experimental = 0
+        self.is_container = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
@@ -205,6 +206,8 @@ class CTSTest:
             return 0
         elif self.is_experimental and not self.CM.Env["experimental-tests"]:
             return 0
+        elif self.is_container and not self.CM.Env["container-tests"]:
+            return 0
         elif self.CM.Env["benchmark"] and self.benchmark == 0:
             return 0
 
@@ -2473,4 +2476,118 @@ def TestList(cm, audits):
             result.append(bound_test)
     return result
 
+###################################################################
+class RemoteLXC(CTSTest):
+###################################################################
+    def __init__(self, cm):
+        CTSTest.__init__(self,cm)
+        self.name="RemoteLXC"
+        self.start = StartTest(cm)
+        self.startall = SimulStartLite(cm)
+        self.num_containers = 2
+        self.is_container = 1
+        self.failed = 0
+        self.fail_string = ""
+
+    def start_lxc_simple(self, node):
+
+        rc = self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
+        if rc == 1:
+            return self.skipped()
+
+        # restore any artifacts laying around from a previous test.
+        self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
+
+        # generate the containers, put them in the config, add some resources to them
+        pats = [ ]
+        watch = self.create_watch(pats, 120)
+        watch.setwatch()
+        pats.append("process_lrm_event: LRM operation lxc1_start_0.*confirmed.*ok")
+        pats.append("process_lrm_event: LRM operation lxc2_start_0.*confirmed.*ok")
+        pats.append("process_lrm_event: LRM operation lxc-ms_start_0.*confirmed.*ok")
+        pats.append("process_lrm_event: LRM operation lxc-ms_promote_0.*confirmed.*ok")
+
+        self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
+        self.set_timer("remoteSimpleInit")
+        watch.lookforall()
+        self.log_timer("remoteSimpleInit")
+        if watch.unmatched:
+            self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
+            self.failed = 1
+
+    def cleanup_lxc_simple(self, node):
+
+        pats = [ ]
+        # if the test failed, attempt to clean up the cib and libvirt environment
+        # as best as possible 
+        if self.failed == 1:
+            # restore libvirt and cib
+            self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
+            self.CM.rsh(node, "crm_resource -C -r container1 &>/dev/null")
+            self.CM.rsh(node, "crm_resource -C -r container2 &>/dev/null")
+            self.CM.rsh(node, "crm_resource -C -r lxc1 &>/dev/null")
+            self.CM.rsh(node, "crm_resource -C -r lxc2 &>/dev/null")
+            self.CM.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null")
+            time.sleep(20)
+            return
+
+        watch = self.create_watch(pats, 120)
+        watch.setwatch()
+
+        pats.append("process_lrm_event: LRM operation container1_stop_0.*confirmed.*ok")
+        pats.append("process_lrm_event: LRM operation container2_stop_0.*confirmed.*ok")
+
+        self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
+        self.set_timer("remoteSimpleCleanup")
+        watch.lookforall()
+        self.log_timer("remoteSimpleCleanup")
+
+        if watch.unmatched:
+            self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
+            self.failed = 1
+
+        # cleanup libvirt
+        self.CM.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
+
+    def __call__(self, node):
+        '''Perform the 'RemoteLXC' test. '''
+        self.incr("calls")
+
+        ret = self.startall(None)
+        if not ret:
+            return self.failure("Setup failed, start all nodes failed.")
+
+        self.start_lxc_simple(node)
+        self.cleanup_lxc_simple(node)
+
+        self.CM.debug("Waiting for the cluster to recover")
+        self.CM.cluster_stable()
+
+        if self.failed == 1:
+            return self.failure(self.fail_string)
+
+        return self.success()
+
+    def errorstoignore(self):
+        '''Return list of errors which should be ignored'''
+        return [ """Updating failcount for ping""",
+                 """LogActions: Recover ping""",
+                 """LogActions: Recover lxc-ms""",
+                 """LogActions: Recover container""",
+                 # The orphaned lxc-ms resource causes an expected transition error
+                 # that is a result of the pengine not having knowledge that the 
+                 # ms resource used to be a clone.  As a result it looks like that 
+                 # resource is running in multiple locations when it shouldn't... But in
+                 # this instance we know why this error is occurring and that it is expected.
+                 """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""",
+                 """Resource lxc-ms .* is active on 2 nodes attempting recovery""",
+                 """Unknown operation: fail""",
+                 """notice: operation_finished: ping-""",
+                 """notice: operation_finished: container""",
+                 """notice: operation_finished: .*_monitor_0:.*:stderr""",
+                 """(ERROR|error): sending stonithRA op to stonithd failed.""",
+                ]
+
+AllTestClasses.append(RemoteLXC)
+
 # vim:ts=4:sw=4:et:
diff --git a/cts/Makefile.am b/cts/Makefile.am
index e01ac10..cb86db8 100644
--- a/cts/Makefile.am
+++ b/cts/Makefile.am
@@ -40,6 +40,7 @@ cts_DATA	=	README cts.supp
 
 cts_SCRIPTS	=	cluster_test		\
 			CTSlab.py		\
+			lxc_autogen.sh	\
 			LSBDummy		\
 			$(top_srcdir)/fencing/fence_dummy
 
diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in
new file mode 100644
index 0000000..1d334c4
--- /dev/null
+++ b/cts/lxc_autogen.sh.in
@@ -0,0 +1,362 @@
+#!/bin/bash
+
+containers="2"
+download=0
+share_configs=0
+# different than default libvirt network in case this is run nested in a KVM instance
+addr="192.168.123.1"
+restore=0
+restore_pcmk=0
+restore_all=0
+generate=0
+key_gen=0
+cib=0
+add_master=0
+verify=0
+working_dir="@CRM_CONFIG_CTS@/lxc"
+curdir=$(pwd)
+
+function helptext() {
+	echo "lxc_autogen.sh - A tool for generating libvirt lxc containers for testing purposes."
+	echo ""
+	echo "Usage: lxc-autogen [options]"
+	echo ""
+	echo "Options:"
+	echo "-g, --generate         Generate libvirt lxc environment in the directory this script is run from."
+	echo "-k, --key-gen          Generate local pacemaker remote key only."
+	echo "-r, --restore-libvirt  Restore the default network, and libvirt config to before this script ran."
+	echo "-p, --restore-cib      Remove cib entries this script generated."
+	echo "-R, --restore-all      Restore both libvirt and cib plus clean working directory. This will leave libvirt xml files though so rsc can be stopped properly."
+	echo ""
+	echo "-a, --add-cib          Add remote-node entries for each lxc instance into the cib"
+	echo "-m, --add-master       Add master resource shared between remote-nodes"
+	echo "-d, --download-agent   Download and install the latest VirtualDomain agent."
+	echo "-s, --share-configs    Copy container configs to all other known cluster nodes, (crm_node -l)"
+	echo "-c, --containers       Specify the number of containers to generate, defaults to $containers. Used with -g"
+	echo "-n, --network          What network to override default libvirt network to. Example: -n 192.168.123.1. Used with -g"
+	echo "-v, --verify           Verify environment is capable of running lxc"
+	echo ""
+	exit $1
+}
+
+while true ; do
+	case "$1" in
+	--help|-h|-\?) helptext 0;;
+	-c|--containers) containers="$2"; shift; shift;;
+	-d|--download-agent) download=1; shift;;
+	-s|--share-configs) share_configs=1; shift;;
+	-n|--network) addr="$2"; shift; shift;;
+	-r|--restore-libvirt) restore=1; shift;;
+	-p|--restore-cib) restore_pcmk=1; shift;;
+	-R|--restore-all)
+		restore_all=1
+		restore=1
+		restore_pcmk=1
+		shift;;
+	-g|--generate) generate=1; shift;;
+	-k|--key-gen) key_gen=1; shift;;
+	-a|--add-cib) cib=1; shift;;
+	-m|--add-master) add_master=1; shift;;
+	-v|--verify) verify=1; shift;;
+	"") break;;
+	*) helptext 1;;
+	esac
+done
+
+if [ $verify -eq 1 ]; then
+	# verify virsh tool is available and that 
+	# we can connect to lxc driver.
+	virsh -c lxc:/// list --all > /dev/null 2>&1
+	if [ $? -ne 0 ]; then
+		echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed"
+		exit 1
+	fi
+
+	cat /etc/selinux/config  | grep -e "SELINUX.*=.*permissive" -e "SELINUX.*=.*enforcing" > /dev/null 2>&1
+	if [ $? -ne 0 ]; then
+		echo "/etc/selinux/config must have SELINUX set to permissive or enforcing mode."
+		exit 1
+	fi
+
+	ps x > /tmp/lxc-autogen-libvirt-test.txt
+	grep "libvirtd" /tmp/lxc-autogen-libvirt-test.txt
+	if [ $? -ne 0 ]; then
+		rm -f /tmp/lxc-autogen-libvirt-test.txt
+		echo "libvirtd isn't up."
+		exit 1
+	fi
+	rm -f /tmp/lxc-autogen-libvirt-test.txt
+
+	which rsync > /dev/null 2>&1
+	if [ $? -ne 0 ]; then
+		echo "rsync is required"
+	fi
+
+fi
+
+#strip last digits off addr
+tmp="s/\.$(echo "$addr" | tr '.' ' ' | awk '{print $4}')$//g"
+addr=$(echo $addr | sed -e ${tmp})
+
+set_network()
+{
+	rm -f cur_network.xml
+	cat << END >> cur_network.xml
+<network>
+  <name>default</name>
+  <uuid>41ebdb84-7134-1111-a136-91f0f1119225</uuid>
+  <forward mode='nat'/>
+  <bridge name='virbr0' stp='on' delay='0' />
+  <mac address='52:54:00:A8:12:35'/>
+  <ip address='$addr.1' netmask='255.255.255.0'>
+    <dhcp>
+      <range start='$addr.2' end='$addr.254' />
+    </dhcp>
+  </ip>
+</network>
+END
+
+	ls restore_default.xml > /dev/null 2>&1
+	if [ $? -ne 0 ]; then
+		virsh net-dumpxml default > restore_default.xml
+	fi
+	virsh net-destroy default
+	virsh net-undefine default
+	virsh net-define cur_network.xml
+	virsh net-start default
+}
+
+generate_key()
+{
+	#generate pacemaker remote key
+	ls /etc/pacemaker/authkey > /dev/null 2>&1
+	if [ $? != 0 ]; then
+			mkdir -p /etc/pacemaker
+			dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1
+	fi
+}
+
+generate()
+{
+	set_network
+
+	# Generate libvirt domains in xml
+	for (( c=1; c <= $containers; c++ ))
+	do
+		rm -rf lxc$c-filesystem
+		mkdir -p lxc$c-filesystem/var/run/
+		mkdir -p lxc$c-filesystem/usr/var/run
+		rm -f lxc$c.xml
+		cat << END >> lxc$c.xml
+<domain type='lxc'>
+  <name>lxc$c</name>
+  <memory>102400</memory>
+  <os>
+    <type>exe</type>
+    <init>$working_dir/lxc$c-filesystem/launch-helper</init>
+  </os>
+  <devices>
+    <console type='pty'/>
+    <filesystem type='mount'>
+      <source dir='$working_dir/lxc$c-filesystem/var/run'/>
+      <target dir='/var/run'/>
+    </filesystem>
+    <filesystem type='mount'>
+      <source dir='$working_dir/lxc$c-filesystem/usr/var/run'/>
+      <target dir='/usr/var/run'/>
+    </filesystem>
+    <interface type='network'>
+      <mac address='52:54:00:$c$c:$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$c'/>
+      <source network='default'/>
+    </interface>
+  </devices>
+</domain>
+END
+		rm -f container$c.cib
+		cat << END >> container$c.cib
+      <primitive class="ocf" id="container$c" provider="heartbeat" type="VirtualDomain">
+        <instance_attributes id="container$c-instance_attributes">
+          <nvpair id="container$c-instance_attributes-force_stop" name="force_stop" value="true"/>
+          <nvpair id="container$c-instance_attributes-hypervisor" name="hypervisor" value="lxc:///"/>
+          <nvpair id="container$c-instance_attributes-config" name="config" value="$working_dir/lxc$c.xml"/>
+          <nvpair id="container$c-instance_attributes-remote-node" name="remote-node" value="lxc$c"/>
+        </instance_attributes>
+        <utilization id="container$c-utilization">
+          <nvpair id="container$c-utilization-cpu" name="cpu" value="1"/>
+          <nvpair id="container$c-utilization-hv_memory" name="hv_memory" value="100"/>
+        </utilization>
+        <meta_attributes id="container$c-meta_attributes">
+          <nvpair id="container$c-meta_attributes-remote-node" name="remote-node" value="lxc$c"/>
+        </meta_attributes>
+      </primitive>
+END
+
+		rm -f lxc-ms$c.cib
+		cat << END >> lxc-ms.cib
+      <master id="lxc-ms-master">
+        <primitive class="ocf" id="lxc-ms" provider="pacemaker" type="Stateful">
+          <instance_attributes id="lxc-ms-instance_attributes"/>
+          <operations>
+            <op id="lxc-ms-monitor-interval-10s" interval="10s" name="monitor"/>
+          </operations>
+        </primitive>
+        <meta_attributes id="lxc-ms-meta_attributes">
+          <nvpair id="lxc-ms-meta_attributes-master-max" name="master-max" value="1"/>
+          <nvpair id="lxc-ms-meta_attributes-clone-max" name="clone-max" value="$containers"/>
+        </meta_attributes>
+      </master>
+END
+
+		rm -f lxc$c-filesystem/launch-helper
+		cat << END >> lxc$c-filesystem/launch-helper
+#!/bin/bash
+ifconfig eth0 $addr.10$c
+route add 0.0.0.0 gw $addr.1 eth0
+hostname lxc$c
+/usr/sbin/pacemaker_remoted
+END
+		chmod 711 lxc$c-filesystem/launch-helper
+
+		cat << END >> /etc/hosts
+$addr.10$c     lxc$c
+END
+	done
+}
+
+apply_cib_master()
+{
+	cibadmin -Q > cur.cib
+	export CIB_file=cur.cib
+
+	cibadmin -o resources -C -x lxc-ms.cib
+	for tmp in $(ls lxc*.xml); do
+		tmp=$(echo $tmp | sed -e 's/\.xml//g')
+		echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms\" score=\"INFINITY\"/>" > tmp_constraint
+		cibadmin -o constraints -C -x tmp_constraint
+		echo "<rsc_location id=\"lxc-ping-location-${tmp}\" node=\"${tmp}\" rsc=\"ping-1\" score=\"-INFINITY\"/>" > tmp_constraint
+		cibadmin -o constraints -C -x tmp_constraint > /dev/null 2>&1
+		rm -f tmp_constraint
+	done
+	unset CIB_file
+
+	cibadmin --replace --xml-file cur.cib
+	rm -f cur.cib
+}
+
+apply_cib_entries()
+{
+	node=$(crm_node -n)
+
+	cibadmin -Q > cur.cib
+	export CIB_file=cur.cib
+	for tmp in $(ls container*.cib); do
+		cibadmin -o resources -C -x $tmp
+
+		tmp=$(echo $tmp | sed -e 's/\.cib//g')
+		crm_resource -M -r $tmp -H $node
+	done
+	unset CIB_file
+
+	cibadmin --replace --xml-file cur.cib
+	rm -f cur.cib
+}
+
+restore_cib()
+{
+	node=$(crm_node -n)
+	cibadmin -Q > cur.cib
+	export CIB_file=cur.cib
+
+	for tmp in $(ls lxc*.xml); do
+		tmp=$(echo $tmp | sed -e 's/\.xml//g')
+		echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms\" score=\"INFINITY\"/>" > tmp_constraint
+		cibadmin -o constraints -D -x tmp_constraint
+		echo "<rsc_location id=\"lxc-ping-location-${tmp}\" node=\"${tmp}\" rsc=\"ping-1\" score=\"-INFINITY\"/>" > tmp_constraint
+		cibadmin -o constraints -D -x tmp_constraint
+		rm -f tmp_constraint
+	done
+	cibadmin -o resources -D -x lxc-ms.cib
+
+	for tmp in $(ls container*.cib); do
+		tmp=$(echo $tmp | sed -e 's/\.cib//g')
+		crm_resource -U -r $tmp -H $node
+		crm_resource -D -r $tmp -t primitive
+	done
+	unset CIB_file
+
+	cibadmin --replace --xml-file cur.cib
+	rm -f  cur.cib 
+}
+
+restore_libvirt()
+{
+	for tmp in $(ls lxc*.xml); do
+		tmp=$(echo $tmp | sed -e 's/\.xml//g')
+		virsh -c lxc:/// destroy $tmp > /dev/null 2>&1
+		virsh -c lxc:/// undefine $tmp > /dev/null 2>&1
+
+		sed -i.bak "/...\....\....\..* ${tmp}/d" /etc/hosts
+		echo "$tmp destroyed"
+	done
+
+	ls restore_default.xml > /dev/null 2>&1
+	if [ $? -eq 0 ]; then
+		virsh net-destroy default > /dev/null 2>&1
+		virsh net-undefine default > /dev/null 2>&1
+		virsh net-define restore_default.xml
+		virsh net-start default
+		if [ $? -eq 0 ]; then
+			echo "default network restored"
+		fi
+	fi
+	rm -f restore_default.xml > /dev/null 2>&1 
+}
+
+distribute_configs()
+{
+	local node
+	local id
+	while read id node
+	do
+		rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' $working_dir/lxc*.xml $node:/$working_dir
+		rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' $working_dir/lxc*-filesystem $node:/$working_dir
+	done < <(crm_node -l)
+}
+
+mkdir -p $working_dir
+cd $working_dir
+
+if [ $download -eq 1 ]; then
+	wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain
+	chmod 755 VirtualDomain
+	mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain
+fi
+if [ $restore_pcmk -eq 1 ]; then
+	restore_cib
+fi
+if [ $restore -eq 1 ]; then
+	restore_libvirt
+fi
+if [ $key_gen -eq 1 ]; then
+	generate_key
+fi
+if [ $generate -eq 1 ]; then
+	if [ $key_gen -eq 0]; then
+		generate_key
+	fi
+	generate
+fi
+if [ $cib -eq 1 ]; then
+	apply_cib_entries
+fi
+if [ $add_master -eq 1 ]; then
+	apply_cib_master
+fi
+if [ $share_configs -eq 1 ]; then
+	distribute_configs
+fi
+if [ $restore_all -eq 1 ]; then
+	ls | grep -v "lxc.\.xml" | xargs rm -rf
+fi
+
+cd $curdir