Blob Blame History Raw
From 062e8379096aa228767518869f213661dca5f275 Mon Sep 17 00:00:00 2001
From: vmallika <vmallika@redhat.com>
Date: Tue, 14 Apr 2015 10:44:13 +0530
Subject: [PATCH 05/18] quota: retry connecting to quotad on ENOTCONN error

This is a backport of http://review.gluster.org/#/c/10230/

> Suppose if there are two volumes vol1 and vol2,
> and quota is enabled and limit is set on vol1.
> Now if IO is happening on vol1 and quota is enabled/disabled
> on vol2, quotad gets restarted and client will receive
> ENOTCONN in the IO path of vol1.
>
> This patch will retry connecting to quotad upto 60sec
> in a interval of 5sec (12 retries)
> If not able to connect with 12 retries, then return ENOTCONN
>
> Change-Id: Ie7f5d108633ec68ba9cc3a6a61d79680485193e8
> BUG: 1211220
> Signed-off-by: vmallika <vmallika@redhat.com>
> Reviewed-on: http://review.gluster.org/10230
> Tested-by: Gluster Build System <jenkins@build.gluster.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Tested-by: Raghavendra G <rgowdapp@redhat.com>

Change-Id: I94d8d4a814a73d69e934f3e77e989e5f3bf2e65a
BUG: 1039674
Signed-off-by: vmallika <vmallika@redhat.com>
Reviewed-on: http://review.gluster.org/11024
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Raghavendra G <rgowdapp@redhat.com>
Signed-off-by: Sachin Pandit <spandit@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/49897
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
 xlators/features/quota/src/quota-enforcer-client.c | 118 +++++++++++++++++----
 xlators/features/quota/src/quota.c                 |   6 +-
 xlators/features/quota/src/quota.h                 |  12 ++-
 3 files changed, 111 insertions(+), 25 deletions(-)

diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
index 01cc447..55e700c 100644
--- a/xlators/features/quota/src/quota-enforcer-client.c
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -120,22 +120,26 @@ int
 quota_enforcer_lookup_cbk (struct rpc_req *req, struct iovec *iov,
                            int count, void *myframe)
 {
-        quota_local_t    *local      = NULL;
-        call_frame_t     *frame      = NULL;
-        int               ret        = 0;
-        gfs3_lookup_rsp   rsp        = {0,};
-        struct iatt       stbuf      = {0,};
-        struct iatt       postparent = {0,};
-        int               op_errno   = EINVAL;
-        dict_t           *xdata      = NULL;
-        inode_t          *inode      = NULL;
-        xlator_t         *this       = NULL;
+        quota_local_t    *local       = NULL;
+        call_frame_t     *frame       = NULL;
+        int               ret         = 0;
+        gfs3_lookup_rsp   rsp         = {0,};
+        struct iatt       stbuf       = {0,};
+        struct iatt       postparent  = {0,};
+        int               op_errno    = EINVAL;
+        dict_t           *xdata       = NULL;
+        inode_t          *inode       = NULL;
+        xlator_t         *this        = NULL;
+        quota_priv_t     *priv        = NULL;
+        struct timespec   retry_delay = {0,};
+        gf_timer_t       *timer       = NULL;
 
         this = THIS;
 
         frame = myframe;
         local = frame->local;
         inode = local->validate_loc.inode;
+        priv  = this->private;
 
         if (-1 == req->rpc_status) {
                 rsp.op_ret   = -1;
@@ -177,6 +181,48 @@ quota_enforcer_lookup_cbk (struct rpc_req *req, struct iovec *iov,
 
 out:
         rsp.op_errno = op_errno;
+
+        /* We need to retry connecting to quotad on ENOTCONN error.
+         * Suppose if there are two volumes vol1 and vol2,
+         * and quota is enabled and limit is set on vol1.
+         * Now if IO is happening on vol1 and quota is enabled/disabled
+         * on vol2, quotad gets restarted and client will receive
+         * ENOTCONN in the IO path of vol1
+         */
+        if (rsp.op_ret == -1 && rsp.op_errno == ENOTCONN) {
+                if (local->quotad_conn_retry >= 12) {
+                        priv->quotad_conn_status = 1;
+                        gf_log (this->name, GF_LOG_WARNING, "failed to connect "
+                                "to quotad after retry count %d)",
+                                local->quotad_conn_retry);
+                } else {
+                        local->quotad_conn_retry++;
+                }
+
+                if (priv->quotad_conn_status == 0) {
+                        /* retry connecting after 5secs for 12 retries
+                         * (upto 60sec).
+                         */
+                        gf_log (this->name, GF_LOG_DEBUG, "retry connecting to "
+                                "quotad (retry count %d)",
+                                local->quotad_conn_retry);
+
+                        retry_delay.tv_sec = 5;
+                        retry_delay.tv_nsec = 0;
+                        timer = gf_timer_call_after (this->ctx, retry_delay,
+                                                     _quota_enforcer_lookup,
+                                                     (void *) frame);
+                        if (timer == NULL) {
+                                gf_log (this->name, GF_LOG_WARNING, "failed to "
+                                        "set quota_enforcer_lookup with timer");
+                        } else {
+                                goto clean;
+                        }
+                }
+        } else {
+                priv->quotad_conn_status = 0;
+        }
+
         if (rsp.op_ret == -1) {
                 /* any error other than ENOENT */
                 if (rsp.op_errno != ENOENT)
@@ -189,11 +235,15 @@ out:
                         gf_log (this->name, GF_LOG_TRACE,
                                 "not found on remote node");
 
+        } else if (local->quotad_conn_retry) {
+                gf_log (this->name, GF_LOG_DEBUG, "connected to quotad after "
+                        "retry count %d", local->quotad_conn_retry);
         }
 
         local->validate_cbk (frame, NULL, this, rsp.op_ret, rsp.op_errno, inode,
                              &stbuf, xdata, &postparent);
 
+clean:
         if (xdata)
                 dict_unref (xdata);
 
@@ -202,21 +252,22 @@ out:
         return 0;
 }
 
-int
-quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
-                       dict_t *xdata, fop_lookup_cbk_t validate_cbk)
+void
+_quota_enforcer_lookup (void *data)
 {
         quota_local_t          *local      = NULL;
         gfs3_lookup_req         req        = {{0,},};
         int                     ret        = 0;
         int                     op_errno   = ESTALE;
         quota_priv_t           *priv       = NULL;
+        call_frame_t           *frame      = NULL;
+        loc_t                  *loc        = NULL;
+        xlator_t               *this       = NULL;
 
-        if (!frame || !this || !loc)
-                goto unwind;
-
+        frame = data;
         local = frame->local;
-        local->validate_cbk = validate_cbk;
+        this  = local->this;
+        loc   = &local->validate_loc;
 
         priv = this->private;
 
@@ -228,8 +279,8 @@ quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
         else
                 memcpy (req.gfid, loc->gfid, 16);
 
-        if (xdata) {
-                GF_PROTOCOL_DICT_SERIALIZE (this, xdata,
+        if (local->validate_xdata) {
+                GF_PROTOCOL_DICT_SERIALIZE (this, local->validate_xdata,
                                             (&req.xdata.xdata_val),
                                             req.xdata.xdata_len,
                                             op_errno, unwind);
@@ -253,13 +304,38 @@ quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
 
         GF_FREE (req.xdata.xdata_val);
 
-        return 0;
+        return;
 
 unwind:
-        validate_cbk (frame, NULL, this, -1, op_errno, NULL, NULL, NULL, NULL);
+        local->validate_cbk (frame, NULL, this, -1, op_errno, NULL, NULL, NULL,
+                             NULL);
 
         GF_FREE (req.xdata.xdata_val);
 
+        return;
+}
+
+int
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+                       fop_lookup_cbk_t validate_cbk)
+{
+        quota_local_t          *local      = NULL;
+
+        if (!frame || !this)
+                goto unwind;
+
+        local = frame->local;
+        local->this = this;
+        local->validate_cbk = validate_cbk;
+        local->validate_xdata = dict_ref (xdata);
+
+        _quota_enforcer_lookup (frame);
+
+        return 0;
+
+unwind:
+        validate_cbk (frame, NULL, this, -1, ESTALE, NULL, NULL, NULL, NULL);
+
         return 0;
 }
 
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 621e849..6d05273 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -171,6 +171,9 @@ quota_local_cleanup (xlator_t *this, quota_local_t *local)
         if (local->xdata)
                 dict_unref (local->xdata);
 
+        if (local->validate_xdata)
+                dict_unref (local->validate_xdata);
+
         if (local->stub)
                 call_stub_destroy (local->stub);
 
@@ -884,8 +887,7 @@ quota_validate (call_frame_t *frame, inode_t *inode, xlator_t *this,
                 goto err;
         }
 
-        ret = quota_enforcer_lookup (frame, this, &local->validate_loc, xdata,
-                                     cbk_fn);
+        ret = quota_enforcer_lookup (frame, this, xdata, cbk_fn);
         if (ret < 0) {
                 ret = -ENOTCONN;
                 goto err;
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 183f8c1..566302c 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -214,6 +214,9 @@ struct quota_local {
         quota_ancestry_built_t  ancestry_cbk;
         void                   *ancestry_data;
         dict_t                 *xdata;
+        dict_t                 *validate_xdata;
+        int32_t                 quotad_conn_retry;
+        xlator_t               *this;
 };
 typedef struct quota_local      quota_local_t;
 
@@ -232,12 +235,17 @@ struct quota_priv {
         inode_table_t         *itable;
         char                  *volume_uuid;
         uint64_t               validation_count;
+        int32_t                quotad_conn_status;
 };
 typedef struct quota_priv      quota_priv_t;
 
 int
-quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
-                       dict_t *xdata, fop_lookup_cbk_t cbk);
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+                       fop_lookup_cbk_t cbk);
+
+void
+_quota_enforcer_lookup (void *data);
+
 struct rpc_clnt *
 quota_enforcer_init (xlator_t *this, dict_t *options);
 
-- 
1.9.3