Blob Blame History Raw
From d7665cf3249310c5faf87368f395b4e25cb86b48 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 15 Apr 2021 10:29:06 +0530
Subject: [PATCH 577/584] protocol/client: don't reopen fds on which POSIX
 locks are held after a reconnect

XXXXXXXXXXXXXXXXXXX
    IMPORTANT:
XXXXXXXXXXXXXXXXXXX
As a best pratice, with this patch we are bumping up the op-version
from GD_OP_VERSION_7_1 to GD_OP_VERSION_7_2 since it introduces a
new volume option. Enabling the new option will have effect only
after all the servers and clients are upgraded to this version.
----------------------------------------------------------------------

Bricks cleanup any granted locks after a client disconnects and
currently these locks are not healed after a reconnect. This means
post reconnect a competing process could be granted a lock even though
the first process which was granted locks has not unlocked. By not
re-opening fds, subsequent operations on such fds will fail forcing
the application to close the current fd and reopen a new one. This way
we prevent any silent corruption.

A new option "client.strict-locks" is introduced to control this
behaviour. This option is set to "off" by default.

> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22712/
> Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
> updates: bz#1694920

BUG: 1689375
Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244909
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
 libglusterfs/src/glusterfs/globals.h             |  4 +-
 tests/bugs/bug-1694920.t                         | 63 ++++++++++++++++++++++++
 xlators/mgmt/glusterd/src/glusterd-volume-set.c  | 14 ++++++
 xlators/protocol/client/src/client-handshake.c   |  3 +-
 xlators/protocol/client/src/client-helpers.c     |  5 +-
 xlators/protocol/client/src/client-lk.c          |  2 +-
 xlators/protocol/client/src/client-rpc-fops.c    | 45 ++++++++++++++++-
 xlators/protocol/client/src/client-rpc-fops_v2.c | 32 +++++++++++-
 xlators/protocol/client/src/client.c             | 13 +++++
 xlators/protocol/client/src/client.h             | 16 ++++++
 10 files changed, 190 insertions(+), 7 deletions(-)
 create mode 100644 tests/bugs/bug-1694920.t

diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
index 33fb023..ce2d110 100644
--- a/libglusterfs/src/glusterfs/globals.h
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -50,7 +50,7 @@
     1 /* MIN is the fresh start op-version, mostly                             \
          should not change */
 #define GD_OP_VERSION_MAX                                                      \
-    GD_OP_VERSION_7_1 /* MAX VERSION is the maximum                            \
+    GD_OP_VERSION_7_2 /* MAX VERSION is the maximum                            \
                          count in VME table, should                            \
                          keep changing with                                    \
                          introduction of newer                                 \
@@ -140,6 +140,8 @@
 
 #define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
 
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+
 #include "glusterfs/xlator.h"
 #include "glusterfs/options.h"
 
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 0000000..5bf93c9
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0  $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index c1ca190..01f3912 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2022,6 +2022,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
      .value = "9",
      .flags = VOLOPT_FLAG_CLIENT_OPT},
 
+    {.key = "client.strict-locks",
+     .voltype = "protocol/client",
+     .option = "strict-locks",
+     .value = "off",
+     .op_version = GD_OP_VERSION_7_2,
+     .validate_fn = validate_boolean,
+     .type = GLOBAL_DOC,
+     .description = "When set, doesn't reopen saved fds after reconnect "
+                    "if POSIX locks are held on them. Hence subsequent "
+                    "operations on these fds will fail. This is "
+                    "necessary for stricter lock complaince as bricks "
+                    "cleanup any granted locks when a client "
+                    "disconnects."},
+
     /* Server xlator options */
     {.key = "network.tcp-window-size",
      .voltype = "protocol/server",
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 6b20d92..a12472b 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -910,7 +910,8 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
     {
         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
         {
-            if (fdctx->remote_fd != -1)
+            if (fdctx->remote_fd != -1 ||
+                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
                 continue;
 
             fdctx->reopen_done = client_child_up_reopen_done;
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 53b4484..6543100 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -410,6 +410,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
 {
     clnt_fd_ctx_t *fdctx = NULL;
     clnt_conf_t *conf = NULL;
+    gf_boolean_t locks_held = _gf_false;
 
     GF_VALIDATE_OR_GOTO(this->name, fd, out);
     GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
@@ -431,11 +432,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
                 *remote_fd = -1;
             else
                 *remote_fd = fdctx->remote_fd;
+
+            locks_held = !list_empty(&fdctx->lock_list);
         }
     }
     pthread_spin_unlock(&conf->fd_lock);
 
-    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
+    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
         *remote_fd = GF_ANON_FD_NO;
 
     return 0;
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
index 679e198..c1fb055 100644
--- a/xlators/protocol/client/src/client-lk.c
+++ b/xlators/protocol/client/src/client-lk.c
@@ -351,7 +351,7 @@ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
 
     list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
     {
-        if (!is_same_lkowner(&lock->owner, owner)) {
+        if (is_same_lkowner(&lock->owner, owner)) {
             list_del_init(&lock->list);
             list_add_tail(&lock->list, &delete_list);
             count++;
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 1c8b31b..3110c78 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -22,8 +22,18 @@ int32_t
 client3_getspec(call_frame_t *frame, xlator_t *this, void *data);
 rpc_clnt_prog_t clnt3_3_fop_prog;
 
-/* CBK */
+int
+client_is_setlk(int32_t cmd)
+{
+    if ((cmd == F_SETLK) || (cmd == F_SETLK64) || (cmd == F_SETLKW) ||
+        (cmd == F_SETLKW64)) {
+        return 1;
+    }
 
+    return 0;
+}
+
+/* CBK */
 int
 client3_3_symlink_cbk(struct rpc_req *req, struct iovec *iov, int count,
                       void *myframe)
@@ -816,7 +826,8 @@ client3_3_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
         goto out;
     }
 
-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+        !fd_is_anonymous(local->fd)) {
         /* Delete all saved locks of the owner issuing flush */
         ret = delete_granted_locks_owner(local->fd, &local->owner);
         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2388,10 +2399,12 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
     int ret = 0;
     xlator_t *this = NULL;
     dict_t *xdata = NULL;
+    clnt_local_t *local = NULL;
 
     this = THIS;
 
     frame = myframe;
+    local = frame->local;
 
     if (-1 == req->rpc_status) {
         rsp.op_ret = -1;
@@ -2412,6 +2425,18 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
         ret = client_post_lk(this, &rsp, &lock, &xdata);
         if (ret < 0)
             goto out;
+
+        /* Save the lock to the client lock cache to be able
+           to recover in the case of server reboot.*/
+
+        if (client_is_setlk(local->cmd)) {
+            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+                                               local->cmd);
+            if (ret < 0) {
+                rsp.op_ret = -1;
+                rsp.op_errno = -ret;
+            }
+        }
     }
 
 out:
@@ -4263,8 +4288,16 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
     ret = client_pre_flush(this, &req, args->fd, args->xdata);
     if (ret) {
         op_errno = -ret;
+        if (op_errno == EBADF) {
+            ret = delete_granted_locks_owner(local->fd, &local->owner);
+            gf_msg_trace(this->name, 0,
+                         "deleting locks of owner (%s) returned %d",
+                         lkowner_utoa(&local->owner), ret);
+        }
+
         goto unwind;
     }
+
     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
                                 client3_3_flush_cbk, NULL,
                                 (xdrproc_t)xdr_gfs3_flush_req);
@@ -5199,8 +5232,16 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
                         args->xdata);
     if (ret) {
         op_errno = -ret;
+
+        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+            client_is_setlk(local->cmd)) {
+            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                         local->cmd);
+        }
+
         goto unwind;
     }
+
     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
                                 client3_3_lk_cbk, NULL,
                                 (xdrproc_t)xdr_gfs3_lk_req);
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index 613dda8..954fc58 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -723,7 +723,8 @@ client4_0_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
         goto out;
     }
 
-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+        !fd_is_anonymous(local->fd)) {
         /* Delete all saved locks of the owner issuing flush */
         ret = delete_granted_locks_owner(local->fd, &local->owner);
         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2193,10 +2194,12 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
     int ret = 0;
     xlator_t *this = NULL;
     dict_t *xdata = NULL;
+    clnt_local_t *local = NULL;
 
     this = THIS;
 
     frame = myframe;
+    local = frame->local;
 
     if (-1 == req->rpc_status) {
         rsp.op_ret = -1;
@@ -2217,6 +2220,18 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
         ret = client_post_lk_v2(this, &rsp, &lock, &xdata);
         if (ret < 0)
             goto out;
+
+        /* Save the lock to the client lock cache to be able
+           to recover in the case of server reboot.*/
+
+        if (client_is_setlk(local->cmd)) {
+            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+                                               local->cmd);
+            if (ret < 0) {
+                rsp.op_ret = -1;
+                rsp.op_errno = -ret;
+            }
+        }
     }
 
 out:
@@ -3998,6 +4013,13 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
     ret = client_pre_flush_v2(this, &req, args->fd, args->xdata);
     if (ret) {
         op_errno = -ret;
+        if (op_errno == EBADF) {
+            ret = delete_granted_locks_owner(local->fd, &local->owner);
+            gf_msg_trace(this->name, 0,
+                         "deleting locks of owner (%s) returned %d",
+                         lkowner_utoa(&local->owner), ret);
+        }
+
         goto unwind;
     }
     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
@@ -4771,8 +4793,16 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
                            args->xdata);
     if (ret) {
         op_errno = -ret;
+
+        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+            client_is_setlk(local->cmd)) {
+            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                         local->cmd);
+        }
+
         goto unwind;
     }
+
     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
                                 client4_0_lk_cbk, NULL,
                                 (xdrproc_t)xdr_gfx_lk_req);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index ed855ca..63c90ea 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -2491,6 +2491,7 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
     GF_OPTION_INIT("filter-O_DIRECT", conf->filter_o_direct, bool, out);
 
     GF_OPTION_INIT("send-gids", conf->send_gids, bool, out);
+    GF_OPTION_INIT("strict-locks", conf->strict_locks, bool, out);
 
     conf->client_id = glusterfs_leaf_position(this);
 
@@ -2676,6 +2677,7 @@ reconfigure(xlator_t *this, dict_t *options)
                      out);
 
     GF_OPTION_RECONF("send-gids", conf->send_gids, options, bool, out);
+    GF_OPTION_RECONF("strict-locks", conf->strict_locks, options, bool, out);
 
     ret = 0;
 out:
@@ -3032,6 +3034,17 @@ struct volume_options options[] = {
                     " power. Range 1-32 threads.",
      .op_version = {GD_OP_VERSION_RHS_3_0},
      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+    {.key = {"strict-locks"},
+     .type = GF_OPTION_TYPE_BOOL,
+     .default_value = "off",
+     .op_version = {GD_OP_VERSION_7_2},
+     .flags = OPT_FLAG_SETTABLE,
+     .description = "When set, doesn't reopen saved fds after reconnect "
+                    "if POSIX locks are held on them. Hence subsequent "
+                    "operations on these fds will fail. This is "
+                    "necessary for stricter lock complaince as bricks "
+                    "cleanup any granted locks when a client "
+                    "disconnects."},
     {.key = {NULL}},
 };
 
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f12fa61..bde3d1a 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -235,6 +235,15 @@ typedef struct clnt_conf {
                                       * up, disconnects can be
                                       * logged
                                       */
+
+    gf_boolean_t strict_locks; /* When set, doesn't reopen saved fds after
+                                  reconnect if POSIX locks are held on them.
+                                  Hence subsequent operations on these fds will
+                                  fail. This is necessary for stricter lock
+                                  complaince as bricks cleanup any granted
+                                  locks when a client disconnects.
+                               */
+
 } clnt_conf_t;
 
 typedef struct _client_fd_ctx {
@@ -513,4 +522,11 @@ compound_request_cleanup_v2(gfx_compound_req *req);
 void
 client_compound_rsp_cleanup_v2(gfx_compound_rsp *rsp, int len);
 
+int
+client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+                             gf_lkowner_t *owner, int32_t cmd);
+
+int
+client_is_setlk(int32_t cmd);
+
 #endif /* !_CLIENT_H */
-- 
1.8.3.1