From 52e5980fc8216ac91006309a62609e33fcae929b Mon Sep 17 00:00:00 2001 From: "Kaleb S. KEITHLEY" Date: Wed, 22 Feb 2017 10:51:43 -0500 Subject: [PATCH 295/300] common-ha: unable to start HA, Connection Error See BZ 1284404. pcsd behavior has changed and pcsd will not accept connections until SSL certificates have fully propagated throughout all the nodes. HA devels suggest a 12 second delay between the `pcs cluster setup ...` and the `pcs cluster start --all` upstream: 3.10, included in https://review.gluster.org/#/c/16692/ 3.9 https://review.gluster.org/#/c/16690/ 3.8 https://review.gluster.org/#/c/16691/ upstream Change-ID: If94b6991a62f346dbead023c7e7f8282a995728c Change-Id: I00865798906e4564272d9bbcbc9de45cfaf4b077 BUG: 1424944 Signed-off-by: Kaleb S. KEITHLEY Reviewed-on: https://code.engineering.redhat.com/gerrit/98383 Tested-by: Jiffin Thottan Reviewed-by: Jiffin Thottan Reviewed-by: Atin Mukherjee --- extras/ganesha/scripts/ganesha-ha.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index df9cf2e..0c3528b 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -192,13 +192,16 @@ setup_cluster() logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" exit 1; fi + + # BZ 1284404, 1425110, allow time for SSL certs to propagate, until then + # pcsd will not accept connections. + sleep 12 pcs cluster start --all - if [ $? -ne 0 ]; then - logger "pcs cluster start failed" - exit 1; - fi + while [ $? -ne 0 ]; do + sleep 2 + pcs cluster start --all + done - sleep 1 # wait for the cluster to elect a DC before querying or writing # to the CIB. BZ 1334092 crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 -- 2.9.3