14f8ab
From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001
14f8ab
From: Harpreet Kaur <hlalwani@redhat.com>
14f8ab
Date: Mon, 7 Jan 2019 16:38:25 +0530
14f8ab
Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected"
14f8ab
 issue
14f8ab
14f8ab
problem: Geo-rep gsyncd process mounts the master and slave volume
14f8ab
         on master nodes and slave nodes respectively and starts
14f8ab
         the sync. But it doesn't wait for the mount to be in ready
14f8ab
         state to accept I/O. The gluster mount is considered to be
14f8ab
         ready when all the distribute sub-volumes is up. If the all
14f8ab
         the distribute subvolumes are not up, it can cause ENOTCONN
14f8ab
         error, when lookup on file comes and file is on the subvol
14f8ab
         that is down.
14f8ab
14f8ab
solution: Added a Virtual Xattr "dht.subvol.status" which returns "1"
14f8ab
          if all subvols are up and "0" if all subvols are not up.
14f8ab
          Geo-rep then uses this virtual xattr after a fresh mount, to
14f8ab
          check whether all subvols are up or not and then starts the
14f8ab
          I/O.
14f8ab
14f8ab
>fixes: bz#1664335
14f8ab
>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
14f8ab
>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com>
14f8ab
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
14f8ab
14f8ab
backport of https://review.gluster.org/#/c/glusterfs/+/22001/
14f8ab
BUG: 1640573
14f8ab
Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
14f8ab
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/202554
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 geo-replication/syncdaemon/resource.py   | 11 ++++++
14f8ab
 geo-replication/syncdaemon/syncdutils.py | 20 +++++++++--
14f8ab
 xlators/cluster/dht/src/dht-common.c     | 59 ++++++++++++++++++++++++++++++++
14f8ab
 xlators/cluster/dht/src/dht-common.h     |  4 +++
14f8ab
 4 files changed, 91 insertions(+), 3 deletions(-)
14f8ab
14f8ab
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
14f8ab
index 189d8a1..0c61de9 100644
14f8ab
--- a/geo-replication/syncdaemon/resource.py
14f8ab
+++ b/geo-replication/syncdaemon/resource.py
14f8ab
@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
14f8ab
 from syncdutils import get_changelog_log_level, get_rsync_version
14f8ab
 from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
14f8ab
 from syncdutils import GX_GFID_CANONICAL_LEN
14f8ab
+from syncdutils import gf_mount_ready
14f8ab
 from gsyncdstatus import GeorepStatus
14f8ab
 from syncdutils import lf, Popen, sup
14f8ab
 from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
14f8ab
@@ -950,6 +951,16 @@ class Mounter(object):
14f8ab
                 logging.exception('mount cleanup failure:')
14f8ab
                 rv = 200
14f8ab
             os._exit(rv)
14f8ab
+
14f8ab
+        #Polling the dht.subvol.status value.
14f8ab
+        RETRIES = 10
14f8ab
+        while not gf_mount_ready():
14f8ab
+            if RETRIES < 0:
14f8ab
+                logging.error('Subvols are not up')
14f8ab
+                break
14f8ab
+            RETRIES -= 1
14f8ab
+            time.sleep(0.2)
14f8ab
+
14f8ab
         logging.debug('auxiliary glusterfs mount prepared')
14f8ab
 
14f8ab
 
14f8ab
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
14f8ab
index b08098e..7560fa1 100644
14f8ab
--- a/geo-replication/syncdaemon/syncdutils.py
14f8ab
+++ b/geo-replication/syncdaemon/syncdutils.py
14f8ab
@@ -21,8 +21,8 @@ import subprocess
14f8ab
 import socket
14f8ab
 from subprocess import PIPE
14f8ab
 from threading import Lock, Thread as baseThread
14f8ab
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
14f8ab
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
14f8ab
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
14f8ab
+from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
14f8ab
 from signal import signal, SIGTERM
14f8ab
 import select as oselect
14f8ab
 from os import waitpid as owaitpid
14f8ab
@@ -55,6 +55,8 @@ from rconf import rconf
14f8ab
 
14f8ab
 from hashlib import sha256 as sha256
14f8ab
 
14f8ab
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
14f8ab
+
14f8ab
 # auxiliary gfid based access prefix
14f8ab
 _CL_AUX_GFID_PFX = ".gfid/"
14f8ab
 ROOT_GFID = "00000000-0000-0000-0000-000000000001"
14f8ab
@@ -100,6 +102,19 @@ def unescape_space_newline(s):
14f8ab
             .replace(NEWLINE_ESCAPE_CHAR, "\n")\
14f8ab
             .replace(PERCENTAGE_ESCAPE_CHAR, "%")
14f8ab
 
14f8ab
+# gf_mount_ready() returns 1 if all subvols are up, else 0
14f8ab
+def gf_mount_ready():
14f8ab
+    ret = errno_wrap(Xattr.lgetxattr,
14f8ab
+                     ['.', 'dht.subvol.status', 16],
14f8ab
+                     [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
14f8ab
+
14f8ab
+    if isinstance(ret, int):
14f8ab
+       logging.error("failed to get the xattr value")
14f8ab
+       return 1
14f8ab
+    ret = ret.rstrip('\x00')
14f8ab
+    if ret == "1":
14f8ab
+       return 1
14f8ab
+    return 0
14f8ab
 
14f8ab
 def norm(s):
14f8ab
     if s:
14f8ab
@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
14f8ab
 def lstat(e):
14f8ab
     return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
14f8ab
 
14f8ab
-
14f8ab
 def get_gfid_from_mnt(gfidpath):
14f8ab
     return errno_wrap(Xattr.lgetxattr,
14f8ab
                       [gfidpath, 'glusterfs.gfid.string',
14f8ab
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
14f8ab
index 6aa18f3..23cc80c 100644
14f8ab
--- a/xlators/cluster/dht/src/dht-common.c
14f8ab
+++ b/xlators/cluster/dht/src/dht-common.c
14f8ab
@@ -4858,6 +4858,60 @@ out:
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
+/* Virtual Xattr which returns 1 if all subvols are up,
14f8ab
+   else returns 0. Geo-rep then uses this virtual xattr
14f8ab
+   after a fresh mount and starts the I/O.
14f8ab
+*/
14f8ab
+
14f8ab
+enum dht_vxattr_subvol {
14f8ab
+    DHT_VXATTR_SUBVOLS_UP = 1,
14f8ab
+    DHT_VXATTR_SUBVOLS_DOWN = 0,
14f8ab
+};
14f8ab
+
14f8ab
+int
14f8ab
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
14f8ab
+                            const char *key)
14f8ab
+{
14f8ab
+    dht_local_t *local = NULL;
14f8ab
+    int ret = -1;
14f8ab
+    int op_errno = ENODATA;
14f8ab
+    int value = DHT_VXATTR_SUBVOLS_UP;
14f8ab
+    int i = 0;
14f8ab
+    dht_conf_t *conf = NULL;
14f8ab
+
14f8ab
+    conf = this->private;
14f8ab
+    local = frame->local;
14f8ab
+
14f8ab
+    if (!key) {
14f8ab
+        op_errno = EINVAL;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    local->xattr = dict_new();
14f8ab
+    if (!local->xattr) {
14f8ab
+        op_errno = ENOMEM;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    for (i = 0; i < conf->subvolume_cnt; i++) {
14f8ab
+        if (!conf->subvolume_status[i]) {
14f8ab
+            value = DHT_VXATTR_SUBVOLS_DOWN;
14f8ab
+            gf_msg_debug(this->name, 0, "subvol %s is down ",
14f8ab
+                         conf->subvolumes[i]->name);
14f8ab
+            break;
14f8ab
+        }
14f8ab
+    }
14f8ab
+    ret = dict_set_int8(local->xattr, (char *)key, value);
14f8ab
+    if (ret < 0) {
14f8ab
+        op_errno = -ret;
14f8ab
+        ret = -1;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    ret = 0;
14f8ab
+
14f8ab
+out:
14f8ab
+    DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
14f8ab
+    return 0;
14f8ab
+}
14f8ab
+
14f8ab
 int
14f8ab
 dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
14f8ab
              dict_t *xdata)
14f8ab
@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
14f8ab
         goto err;
14f8ab
     }
14f8ab
 
14f8ab
+    if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
14f8ab
+        dht_vgetxattr_subvol_status(frame, this, key);
14f8ab
+        return 0;
14f8ab
+    }
14f8ab
+
14f8ab
     /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
14f8ab
     if (!DHT_IS_DIR(layout))
14f8ab
         goto no_dht_is_dir;
14f8ab
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
14f8ab
index 1b3e826..9ec5b51 100644
14f8ab
--- a/xlators/cluster/dht/src/dht-common.h
14f8ab
+++ b/xlators/cluster/dht/src/dht-common.h
14f8ab
@@ -45,6 +45,10 @@
14f8ab
 #define DHT_DIR_STAT_BLOCKS 8
14f8ab
 #define DHT_DIR_STAT_SIZE 4096
14f8ab
 
14f8ab
+/* Virtual xattr for subvols status */
14f8ab
+
14f8ab
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
14f8ab
+
14f8ab
 /* Virtual xattrs for debugging */
14f8ab
 
14f8ab
 #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab