|
|
17b94a |
From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001
|
|
|
17b94a |
From: Harpreet Kaur <hlalwani@redhat.com>
|
|
|
17b94a |
Date: Mon, 7 Jan 2019 16:38:25 +0530
|
|
|
17b94a |
Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected"
|
|
|
17b94a |
issue
|
|
|
17b94a |
|
|
|
17b94a |
problem: Geo-rep gsyncd process mounts the master and slave volume
|
|
|
17b94a |
on master nodes and slave nodes respectively and starts
|
|
|
17b94a |
the sync. But it doesn't wait for the mount to be in ready
|
|
|
17b94a |
state to accept I/O. The gluster mount is considered to be
|
|
|
17b94a |
ready when all the distribute sub-volumes is up. If the all
|
|
|
17b94a |
the distribute subvolumes are not up, it can cause ENOTCONN
|
|
|
17b94a |
error, when lookup on file comes and file is on the subvol
|
|
|
17b94a |
that is down.
|
|
|
17b94a |
|
|
|
17b94a |
solution: Added a Virtual Xattr "dht.subvol.status" which returns "1"
|
|
|
17b94a |
if all subvols are up and "0" if all subvols are not up.
|
|
|
17b94a |
Geo-rep then uses this virtual xattr after a fresh mount, to
|
|
|
17b94a |
check whether all subvols are up or not and then starts the
|
|
|
17b94a |
I/O.
|
|
|
17b94a |
|
|
|
17b94a |
>fixes: bz#1664335
|
|
|
17b94a |
>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
|
|
|
17b94a |
>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com>
|
|
|
17b94a |
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
17b94a |
|
|
|
17b94a |
backport of https://review.gluster.org/#/c/glusterfs/+/22001/
|
|
|
17b94a |
BUG: 1640573
|
|
|
17b94a |
Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
|
|
|
17b94a |
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
|
|
|
17b94a |
Reviewed-on: https://code.engineering.redhat.com/gerrit/202554
|
|
|
17b94a |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
17b94a |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
17b94a |
---
|
|
|
17b94a |
geo-replication/syncdaemon/resource.py | 11 ++++++
|
|
|
17b94a |
geo-replication/syncdaemon/syncdutils.py | 20 +++++++++--
|
|
|
17b94a |
xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++
|
|
|
17b94a |
xlators/cluster/dht/src/dht-common.h | 4 +++
|
|
|
17b94a |
4 files changed, 91 insertions(+), 3 deletions(-)
|
|
|
17b94a |
|
|
|
17b94a |
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
|
|
|
17b94a |
index 189d8a1..0c61de9 100644
|
|
|
17b94a |
--- a/geo-replication/syncdaemon/resource.py
|
|
|
17b94a |
+++ b/geo-replication/syncdaemon/resource.py
|
|
|
17b94a |
@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
|
|
|
17b94a |
from syncdutils import get_changelog_log_level, get_rsync_version
|
|
|
17b94a |
from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
|
|
|
17b94a |
from syncdutils import GX_GFID_CANONICAL_LEN
|
|
|
17b94a |
+from syncdutils import gf_mount_ready
|
|
|
17b94a |
from gsyncdstatus import GeorepStatus
|
|
|
17b94a |
from syncdutils import lf, Popen, sup
|
|
|
17b94a |
from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
|
|
|
17b94a |
@@ -950,6 +951,16 @@ class Mounter(object):
|
|
|
17b94a |
logging.exception('mount cleanup failure:')
|
|
|
17b94a |
rv = 200
|
|
|
17b94a |
os._exit(rv)
|
|
|
17b94a |
+
|
|
|
17b94a |
+ #Polling the dht.subvol.status value.
|
|
|
17b94a |
+ RETRIES = 10
|
|
|
17b94a |
+ while not gf_mount_ready():
|
|
|
17b94a |
+ if RETRIES < 0:
|
|
|
17b94a |
+ logging.error('Subvols are not up')
|
|
|
17b94a |
+ break
|
|
|
17b94a |
+ RETRIES -= 1
|
|
|
17b94a |
+ time.sleep(0.2)
|
|
|
17b94a |
+
|
|
|
17b94a |
logging.debug('auxiliary glusterfs mount prepared')
|
|
|
17b94a |
|
|
|
17b94a |
|
|
|
17b94a |
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
|
|
|
17b94a |
index b08098e..7560fa1 100644
|
|
|
17b94a |
--- a/geo-replication/syncdaemon/syncdutils.py
|
|
|
17b94a |
+++ b/geo-replication/syncdaemon/syncdutils.py
|
|
|
17b94a |
@@ -21,8 +21,8 @@ import subprocess
|
|
|
17b94a |
import socket
|
|
|
17b94a |
from subprocess import PIPE
|
|
|
17b94a |
from threading import Lock, Thread as baseThread
|
|
|
17b94a |
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
|
|
|
17b94a |
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
|
|
|
17b94a |
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
|
|
|
17b94a |
+from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
|
|
|
17b94a |
from signal import signal, SIGTERM
|
|
|
17b94a |
import select as oselect
|
|
|
17b94a |
from os import waitpid as owaitpid
|
|
|
17b94a |
@@ -55,6 +55,8 @@ from rconf import rconf
|
|
|
17b94a |
|
|
|
17b94a |
from hashlib import sha256 as sha256
|
|
|
17b94a |
|
|
|
17b94a |
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
|
|
|
17b94a |
+
|
|
|
17b94a |
# auxiliary gfid based access prefix
|
|
|
17b94a |
_CL_AUX_GFID_PFX = ".gfid/"
|
|
|
17b94a |
ROOT_GFID = "00000000-0000-0000-0000-000000000001"
|
|
|
17b94a |
@@ -100,6 +102,19 @@ def unescape_space_newline(s):
|
|
|
17b94a |
.replace(NEWLINE_ESCAPE_CHAR, "\n")\
|
|
|
17b94a |
.replace(PERCENTAGE_ESCAPE_CHAR, "%")
|
|
|
17b94a |
|
|
|
17b94a |
+# gf_mount_ready() returns 1 if all subvols are up, else 0
|
|
|
17b94a |
+def gf_mount_ready():
|
|
|
17b94a |
+ ret = errno_wrap(Xattr.lgetxattr,
|
|
|
17b94a |
+ ['.', 'dht.subvol.status', 16],
|
|
|
17b94a |
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
|
|
|
17b94a |
+
|
|
|
17b94a |
+ if isinstance(ret, int):
|
|
|
17b94a |
+ logging.error("failed to get the xattr value")
|
|
|
17b94a |
+ return 1
|
|
|
17b94a |
+ ret = ret.rstrip('\x00')
|
|
|
17b94a |
+ if ret == "1":
|
|
|
17b94a |
+ return 1
|
|
|
17b94a |
+ return 0
|
|
|
17b94a |
|
|
|
17b94a |
def norm(s):
|
|
|
17b94a |
if s:
|
|
|
17b94a |
@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
|
|
|
17b94a |
def lstat(e):
|
|
|
17b94a |
return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
|
|
|
17b94a |
|
|
|
17b94a |
-
|
|
|
17b94a |
def get_gfid_from_mnt(gfidpath):
|
|
|
17b94a |
return errno_wrap(Xattr.lgetxattr,
|
|
|
17b94a |
[gfidpath, 'glusterfs.gfid.string',
|
|
|
17b94a |
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
|
|
|
17b94a |
index 6aa18f3..23cc80c 100644
|
|
|
17b94a |
--- a/xlators/cluster/dht/src/dht-common.c
|
|
|
17b94a |
+++ b/xlators/cluster/dht/src/dht-common.c
|
|
|
17b94a |
@@ -4858,6 +4858,60 @@ out:
|
|
|
17b94a |
return 0;
|
|
|
17b94a |
}
|
|
|
17b94a |
|
|
|
17b94a |
+/* Virtual Xattr which returns 1 if all subvols are up,
|
|
|
17b94a |
+ else returns 0. Geo-rep then uses this virtual xattr
|
|
|
17b94a |
+ after a fresh mount and starts the I/O.
|
|
|
17b94a |
+*/
|
|
|
17b94a |
+
|
|
|
17b94a |
+enum dht_vxattr_subvol {
|
|
|
17b94a |
+ DHT_VXATTR_SUBVOLS_UP = 1,
|
|
|
17b94a |
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
|
|
|
17b94a |
+};
|
|
|
17b94a |
+
|
|
|
17b94a |
+int
|
|
|
17b94a |
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
|
|
|
17b94a |
+ const char *key)
|
|
|
17b94a |
+{
|
|
|
17b94a |
+ dht_local_t *local = NULL;
|
|
|
17b94a |
+ int ret = -1;
|
|
|
17b94a |
+ int op_errno = ENODATA;
|
|
|
17b94a |
+ int value = DHT_VXATTR_SUBVOLS_UP;
|
|
|
17b94a |
+ int i = 0;
|
|
|
17b94a |
+ dht_conf_t *conf = NULL;
|
|
|
17b94a |
+
|
|
|
17b94a |
+ conf = this->private;
|
|
|
17b94a |
+ local = frame->local;
|
|
|
17b94a |
+
|
|
|
17b94a |
+ if (!key) {
|
|
|
17b94a |
+ op_errno = EINVAL;
|
|
|
17b94a |
+ goto out;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ local->xattr = dict_new();
|
|
|
17b94a |
+ if (!local->xattr) {
|
|
|
17b94a |
+ op_errno = ENOMEM;
|
|
|
17b94a |
+ goto out;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ for (i = 0; i < conf->subvolume_cnt; i++) {
|
|
|
17b94a |
+ if (!conf->subvolume_status[i]) {
|
|
|
17b94a |
+ value = DHT_VXATTR_SUBVOLS_DOWN;
|
|
|
17b94a |
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
|
|
|
17b94a |
+ conf->subvolumes[i]->name);
|
|
|
17b94a |
+ break;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ ret = dict_set_int8(local->xattr, (char *)key, value);
|
|
|
17b94a |
+ if (ret < 0) {
|
|
|
17b94a |
+ op_errno = -ret;
|
|
|
17b94a |
+ ret = -1;
|
|
|
17b94a |
+ goto out;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ ret = 0;
|
|
|
17b94a |
+
|
|
|
17b94a |
+out:
|
|
|
17b94a |
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
|
|
|
17b94a |
+ return 0;
|
|
|
17b94a |
+}
|
|
|
17b94a |
+
|
|
|
17b94a |
int
|
|
|
17b94a |
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
|
|
|
17b94a |
dict_t *xdata)
|
|
|
17b94a |
@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
|
|
|
17b94a |
goto err;
|
|
|
17b94a |
}
|
|
|
17b94a |
|
|
|
17b94a |
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
|
|
|
17b94a |
+ dht_vgetxattr_subvol_status(frame, this, key);
|
|
|
17b94a |
+ return 0;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+
|
|
|
17b94a |
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
|
|
|
17b94a |
if (!DHT_IS_DIR(layout))
|
|
|
17b94a |
goto no_dht_is_dir;
|
|
|
17b94a |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
17b94a |
index 1b3e826..9ec5b51 100644
|
|
|
17b94a |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
17b94a |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
17b94a |
@@ -45,6 +45,10 @@
|
|
|
17b94a |
#define DHT_DIR_STAT_BLOCKS 8
|
|
|
17b94a |
#define DHT_DIR_STAT_SIZE 4096
|
|
|
17b94a |
|
|
|
17b94a |
+/* Virtual xattr for subvols status */
|
|
|
17b94a |
+
|
|
|
17b94a |
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
|
|
|
17b94a |
+
|
|
|
17b94a |
/* Virtual xattrs for debugging */
|
|
|
17b94a |
|
|
|
17b94a |
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
|
|
|
17b94a |
--
|
|
|
17b94a |
1.8.3.1
|
|
|
17b94a |
|