74096c
From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001
74096c
From: Harpreet Kaur <hlalwani@redhat.com>
74096c
Date: Mon, 7 Jan 2019 16:38:25 +0530
74096c
Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected"
74096c
 issue
74096c
74096c
problem: Geo-rep gsyncd process mounts the master and slave volume
74096c
         on master nodes and slave nodes respectively and starts
74096c
         the sync. But it doesn't wait for the mount to be in ready
74096c
         state to accept I/O. The gluster mount is considered to be
74096c
         ready when all the distribute sub-volumes is up. If the all
74096c
         the distribute subvolumes are not up, it can cause ENOTCONN
74096c
         error, when lookup on file comes and file is on the subvol
74096c
         that is down.
74096c
74096c
solution: Added a Virtual Xattr "dht.subvol.status" which returns "1"
74096c
          if all subvols are up and "0" if all subvols are not up.
74096c
          Geo-rep then uses this virtual xattr after a fresh mount, to
74096c
          check whether all subvols are up or not and then starts the
74096c
          I/O.
74096c
74096c
>fixes: bz#1664335
74096c
>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
74096c
>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com>
74096c
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
74096c
74096c
backport of https://review.gluster.org/#/c/glusterfs/+/22001/
74096c
BUG: 1640573
74096c
Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
74096c
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/202554
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 geo-replication/syncdaemon/resource.py   | 11 ++++++
74096c
 geo-replication/syncdaemon/syncdutils.py | 20 +++++++++--
74096c
 xlators/cluster/dht/src/dht-common.c     | 59 ++++++++++++++++++++++++++++++++
74096c
 xlators/cluster/dht/src/dht-common.h     |  4 +++
74096c
 4 files changed, 91 insertions(+), 3 deletions(-)
74096c
74096c
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
74096c
index 189d8a1..0c61de9 100644
74096c
--- a/geo-replication/syncdaemon/resource.py
74096c
+++ b/geo-replication/syncdaemon/resource.py
74096c
@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
74096c
 from syncdutils import get_changelog_log_level, get_rsync_version
74096c
 from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
74096c
 from syncdutils import GX_GFID_CANONICAL_LEN
74096c
+from syncdutils import gf_mount_ready
74096c
 from gsyncdstatus import GeorepStatus
74096c
 from syncdutils import lf, Popen, sup
74096c
 from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
74096c
@@ -950,6 +951,16 @@ class Mounter(object):
74096c
                 logging.exception('mount cleanup failure:')
74096c
                 rv = 200
74096c
             os._exit(rv)
74096c
+
74096c
+        #Polling the dht.subvol.status value.
74096c
+        RETRIES = 10
74096c
+        while not gf_mount_ready():
74096c
+            if RETRIES < 0:
74096c
+                logging.error('Subvols are not up')
74096c
+                break
74096c
+            RETRIES -= 1
74096c
+            time.sleep(0.2)
74096c
+
74096c
         logging.debug('auxiliary glusterfs mount prepared')
74096c
 
74096c
 
74096c
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
74096c
index b08098e..7560fa1 100644
74096c
--- a/geo-replication/syncdaemon/syncdutils.py
74096c
+++ b/geo-replication/syncdaemon/syncdutils.py
74096c
@@ -21,8 +21,8 @@ import subprocess
74096c
 import socket
74096c
 from subprocess import PIPE
74096c
 from threading import Lock, Thread as baseThread
74096c
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
74096c
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
74096c
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
74096c
+from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
74096c
 from signal import signal, SIGTERM
74096c
 import select as oselect
74096c
 from os import waitpid as owaitpid
74096c
@@ -55,6 +55,8 @@ from rconf import rconf
74096c
 
74096c
 from hashlib import sha256 as sha256
74096c
 
74096c
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
74096c
+
74096c
 # auxiliary gfid based access prefix
74096c
 _CL_AUX_GFID_PFX = ".gfid/"
74096c
 ROOT_GFID = "00000000-0000-0000-0000-000000000001"
74096c
@@ -100,6 +102,19 @@ def unescape_space_newline(s):
74096c
             .replace(NEWLINE_ESCAPE_CHAR, "\n")\
74096c
             .replace(PERCENTAGE_ESCAPE_CHAR, "%")
74096c
 
74096c
+# gf_mount_ready() returns 1 if all subvols are up, else 0
74096c
+def gf_mount_ready():
74096c
+    ret = errno_wrap(Xattr.lgetxattr,
74096c
+                     ['.', 'dht.subvol.status', 16],
74096c
+                     [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
74096c
+
74096c
+    if isinstance(ret, int):
74096c
+       logging.error("failed to get the xattr value")
74096c
+       return 1
74096c
+    ret = ret.rstrip('\x00')
74096c
+    if ret == "1":
74096c
+       return 1
74096c
+    return 0
74096c
 
74096c
 def norm(s):
74096c
     if s:
74096c
@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
74096c
 def lstat(e):
74096c
     return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
74096c
 
74096c
-
74096c
 def get_gfid_from_mnt(gfidpath):
74096c
     return errno_wrap(Xattr.lgetxattr,
74096c
                       [gfidpath, 'glusterfs.gfid.string',
74096c
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
74096c
index 6aa18f3..23cc80c 100644
74096c
--- a/xlators/cluster/dht/src/dht-common.c
74096c
+++ b/xlators/cluster/dht/src/dht-common.c
74096c
@@ -4858,6 +4858,60 @@ out:
74096c
     return 0;
74096c
 }
74096c
 
74096c
+/* Virtual Xattr which returns 1 if all subvols are up,
74096c
+   else returns 0. Geo-rep then uses this virtual xattr
74096c
+   after a fresh mount and starts the I/O.
74096c
+*/
74096c
+
74096c
+enum dht_vxattr_subvol {
74096c
+    DHT_VXATTR_SUBVOLS_UP = 1,
74096c
+    DHT_VXATTR_SUBVOLS_DOWN = 0,
74096c
+};
74096c
+
74096c
+int
74096c
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
74096c
+                            const char *key)
74096c
+{
74096c
+    dht_local_t *local = NULL;
74096c
+    int ret = -1;
74096c
+    int op_errno = ENODATA;
74096c
+    int value = DHT_VXATTR_SUBVOLS_UP;
74096c
+    int i = 0;
74096c
+    dht_conf_t *conf = NULL;
74096c
+
74096c
+    conf = this->private;
74096c
+    local = frame->local;
74096c
+
74096c
+    if (!key) {
74096c
+        op_errno = EINVAL;
74096c
+        goto out;
74096c
+    }
74096c
+    local->xattr = dict_new();
74096c
+    if (!local->xattr) {
74096c
+        op_errno = ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    for (i = 0; i < conf->subvolume_cnt; i++) {
74096c
+        if (!conf->subvolume_status[i]) {
74096c
+            value = DHT_VXATTR_SUBVOLS_DOWN;
74096c
+            gf_msg_debug(this->name, 0, "subvol %s is down ",
74096c
+                         conf->subvolumes[i]->name);
74096c
+            break;
74096c
+        }
74096c
+    }
74096c
+    ret = dict_set_int8(local->xattr, (char *)key, value);
74096c
+    if (ret < 0) {
74096c
+        op_errno = -ret;
74096c
+        ret = -1;
74096c
+        goto out;
74096c
+    }
74096c
+    ret = 0;
74096c
+
74096c
+out:
74096c
+    DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
74096c
+    return 0;
74096c
+}
74096c
+
74096c
 int
74096c
 dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
74096c
              dict_t *xdata)
74096c
@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
74096c
         goto err;
74096c
     }
74096c
 
74096c
+    if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
74096c
+        dht_vgetxattr_subvol_status(frame, this, key);
74096c
+        return 0;
74096c
+    }
74096c
+
74096c
     /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
74096c
     if (!DHT_IS_DIR(layout))
74096c
         goto no_dht_is_dir;
74096c
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
74096c
index 1b3e826..9ec5b51 100644
74096c
--- a/xlators/cluster/dht/src/dht-common.h
74096c
+++ b/xlators/cluster/dht/src/dht-common.h
74096c
@@ -45,6 +45,10 @@
74096c
 #define DHT_DIR_STAT_BLOCKS 8
74096c
 #define DHT_DIR_STAT_SIZE 4096
74096c
 
74096c
+/* Virtual xattr for subvols status */
74096c
+
74096c
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
74096c
+
74096c
 /* Virtual xattrs for debugging */
74096c
 
74096c
 #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
74096c
-- 
74096c
1.8.3.1
74096c