21ab4e
From 04ba7d1e81b9dda9fcb19e2821cf1e3588166e6d Mon Sep 17 00:00:00 2001
21ab4e
From: Niels de Vos <ndevos@redhat.com>
21ab4e
Date: Wed, 21 Jun 2017 16:25:33 +0200
21ab4e
Subject: [PATCH 548/557] nfs/nlm: handle reconnect for non-NLM4_LOCK requests
21ab4e
21ab4e
When a reply on an NLM-procedure gets stuck, the NFS-client will resend
21ab4e
the request. This can happen through a re-connect in case the connection
21ab4e
was terminated (long delay in the reply on the initial request). Once
21ab4e
that happens, not all NLM-procedures are handled correctly.
21ab4e
21ab4e
Testing this is difficult and time-consuming. There still may be
21ab4e
problems with certain operations, but this definitely makes it behave
21ab4e
much better than before.
21ab4e
21ab4e
The problem occured due to a problem in EC, change-id I18a782903ba
21ab4e
addressed the root cause.
21ab4e
21ab4e
Cherry picked from commit fafe1491ead527ba1024c521013aa90d2ee2b355:
21ab4e
> Change-Id: I23b385568e27232951fa3fbd7198a0e5d775a8c2
21ab4e
> BUG: 1467313
21ab4e
> Signed-off-by: Niels de Vos <ndevos@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17698
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
21ab4e
Change-Id: I23b385568e27232951fa3fbd7198a0e5d775a8c2
21ab4e
BUG: 1411344
21ab4e
igned-off-by: Niels de Vos <ndevos@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/111767
21ab4e
Tested-by: Niels de Vos <ndevos@redhat.com>
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 xlators/nfs/server/src/nlm4.c | 101 +++++++++++++++++++++++++++++++++---------
21ab4e
 1 file changed, 79 insertions(+), 22 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
21ab4e
index f57df4e..ea4e628 100644
21ab4e
--- a/xlators/nfs/server/src/nlm4.c
21ab4e
+++ b/xlators/nfs/server/src/nlm4.c
21ab4e
@@ -923,38 +923,20 @@ nlm4svc_send_granted_cbk (struct rpc_req *req, struct iovec *iov, int count,
21ab4e
         return 0;
21ab4e
 }
21ab4e
 
21ab4e
-void
21ab4e
-nlm4svc_send_granted (nfs3_call_state_t *cs);
21ab4e
+static int
21ab4e
+nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs);
21ab4e
 
21ab4e
 int
21ab4e
 nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
21ab4e
                     rpc_clnt_event_t fn, void *data)
21ab4e
 {
21ab4e
-        int                ret         = 0;
21ab4e
-        char              *caller_name = NULL;
21ab4e
         nfs3_call_state_t *cs          = NULL;
21ab4e
 
21ab4e
         cs = mydata;
21ab4e
 
21ab4e
         switch (fn) {
21ab4e
         case RPC_CLNT_CONNECT:
21ab4e
-                if (!cs->req) {
21ab4e
-                        gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL,
21ab4e
-                                NFS_MSG_RPC_CLNT_ERROR, "Spurious notify?!");
21ab4e
-                        goto err;
21ab4e
-                }
21ab4e
-
21ab4e
-                caller_name = cs->args.nlm4_lockargs.alock.caller_name;
21ab4e
-                ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
21ab4e
-                if (ret == -1) {
21ab4e
-                        gf_msg (GF_NLM, GF_LOG_ERROR, 0,
21ab4e
-                                NFS_MSG_RPC_CLNT_ERROR, "Failed to set "
21ab4e
-                                "rpc clnt");
21ab4e
-                        goto err;
21ab4e
-                }
21ab4e
-                nlm4svc_send_granted (cs);
21ab4e
-                rpc_clnt_unref (rpc_clnt);
21ab4e
-
21ab4e
+                nlm_handle_connect (rpc_clnt, cs);
21ab4e
                 break;
21ab4e
 
21ab4e
         case RPC_CLNT_MSG:
21ab4e
@@ -967,7 +949,6 @@ nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
21ab4e
                 break;
21ab4e
         }
21ab4e
 
21ab4e
- err:
21ab4e
         return 0;
21ab4e
 }
21ab4e
 
21ab4e
@@ -2372,6 +2353,82 @@ nlm4svc_sm_notify (struct nlm_sm_status *status)
21ab4e
         nlm_cleanup_fds (status->mon_name);
21ab4e
 }
21ab4e
 
21ab4e
+
21ab4e
+/* RPC_CLNT_CONNECT gets called on (re)connects and should be able to handle
21ab4e
+ * different NLM requests. */
21ab4e
+static int
21ab4e
+nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs)
21ab4e
+{
21ab4e
+        int                 ret         = -1;
21ab4e
+        int                 nlm_proc    = NLM4_NULL;
21ab4e
+        struct nlm4_lock   *alock       = NULL;
21ab4e
+        char               *caller_name = NULL;
21ab4e
+
21ab4e
+        if (!cs || !cs->req) {
21ab4e
+                gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_RPC_CLNT_ERROR,
21ab4e
+                        "Spurious notify?!");
21ab4e
+                goto out;
21ab4e
+        }
21ab4e
+
21ab4e
+        /* NLM4_* actions from nlm4.h */
21ab4e
+        if (cs->req->prognum == NLM_PROGRAM) {
21ab4e
+                nlm_proc = cs->req->procnum;
21ab4e
+        } else {
21ab4e
+                /* hmm, cs->req has not been filled completely */
21ab4e
+                if (cs->resume_fn == nlm4_lock_fd_resume)
21ab4e
+                        nlm_proc = NLM4_LOCK;
21ab4e
+                else if (cs->resume_fn == nlm4_cancel_fd_resume)
21ab4e
+                        nlm_proc = NLM4_CANCEL;
21ab4e
+                else if (cs->resume_fn == nlm4_unlock_fd_resume)
21ab4e
+                        nlm_proc = NLM4_UNLOCK;
21ab4e
+                else {
21ab4e
+                        gf_msg (GF_NLM, GF_LOG_ERROR, 0,
21ab4e
+                                NFS_MSG_RPC_CLNT_ERROR, "(re)connect with an "
21ab4e
+                                "unexpected NLM4 procedure (%d)", nlm_proc);
21ab4e
+                        goto out;
21ab4e
+                }
21ab4e
+        }
21ab4e
+
21ab4e
+        switch (nlm_proc) {
21ab4e
+        case NLM4_LOCK:
21ab4e
+                alock = &cs->args.nlm4_lockargs.alock;
21ab4e
+                caller_name = alock->caller_name;
21ab4e
+
21ab4e
+                ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
21ab4e
+                if (ret == -1) {
21ab4e
+                        gf_msg (GF_NLM, GF_LOG_ERROR, 0,
21ab4e
+                                NFS_MSG_RPC_CLNT_ERROR, "Failed to set "
21ab4e
+                                "rpc clnt");
21ab4e
+                        goto out;
21ab4e
+                }
21ab4e
+
21ab4e
+                /* extra ref taken with nlm_set_rpc_clnt() */
21ab4e
+                rpc_clnt_unref (rpc_clnt);
21ab4e
+
21ab4e
+                nlm4svc_send_granted (cs);
21ab4e
+                break;
21ab4e
+
21ab4e
+        case NLM4_CANCEL:
21ab4e
+                /* alock = &cs->args.nlm4_cancargs.alock; */
21ab4e
+                ret = nlm4svc_cancel (cs->req);
21ab4e
+                break;
21ab4e
+
21ab4e
+        case NLM4_UNLOCK:
21ab4e
+                /* alock = &cs->args.nlm4_unlockargs.alock; */
21ab4e
+                ret = nlm4svc_unlock (cs->req);
21ab4e
+                break;
21ab4e
+
21ab4e
+        default:
21ab4e
+                gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_RPC_CLNT_ERROR,
21ab4e
+                        "(re)connect with an unexpected NLM4 procedure "
21ab4e
+                        "(%d)", nlm_proc);
21ab4e
+        }
21ab4e
+
21ab4e
+out:
21ab4e
+        return ret;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
 rpcsvc_actor_t  nlm4svc_actors[NLM4_PROC_COUNT] = {
21ab4e
         /* 0 */
21ab4e
         {"NULL",       NLM4_NULL,         nlm4svc_null,      NULL, 0, DRC_IDEMPOTENT},
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e