17b94a
From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001
17b94a
From: Xavi Hernandez <xhernandez@redhat.com>
17b94a
Date: Sat, 13 Feb 2021 18:37:32 +0100
17b94a
Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr
17b94a
17b94a
A shared dictionary was updated outside the lock after having updated
17b94a
the number of remaining answers. This means that one thread may be
17b94a
processing the last answer and unwinding the request before another
17b94a
thread completes updating the dict.
17b94a
17b94a
    Thread 1                           Thread 2
17b94a
17b94a
    LOCK()
17b94a
    call_cnt-- (=1)
17b94a
    UNLOCK()
17b94a
                                       LOCK()
17b94a
                                       call_cnt-- (=0)
17b94a
                                       UNLOCK()
17b94a
                                       update_dict(dict)
17b94a
                                       if (call_cnt == 0) {
17b94a
                                           STACK_UNWIND(dict);
17b94a
                                       }
17b94a
    update_dict(dict)
17b94a
    if (call_cnt == 0) {
17b94a
        STACK_UNWIND(dict);
17b94a
    }
17b94a
17b94a
The updates from thread 1 are lost.
17b94a
17b94a
This patch also reduces the work done inside the locked region and
17b94a
reduces code duplication.
17b94a
17b94a
Upstream-patch:
17b94a
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162
17b94a
> Fixes: #2161
17b94a
> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
17b94a
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
17b94a
17b94a
BUG: 1911292
17b94a
Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
17b94a
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/228924
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++-----------------
17b94a
 1 file changed, 112 insertions(+), 142 deletions(-)
17b94a
17b94a
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
17b94a
index cf305af..98e195a 100644
17b94a
--- a/xlators/cluster/afr/src/afr-inode-read.c
17b94a
+++ b/xlators/cluster/afr/src/afr-inode-read.c
17b94a
@@ -15,6 +15,8 @@
17b94a
 #include <stdlib.h>
17b94a
 #include <signal.h>
17b94a
 
17b94a
+#include <urcu/uatomic.h>
17b94a
+
17b94a
 #include <glusterfs/glusterfs.h>
17b94a
 #include "afr.h"
17b94a
 #include <glusterfs/dict.h>
17b94a
@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
     return 0;
17b94a
 }
17b94a
 
17b94a
-int32_t
17b94a
-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
-                          int32_t op_ret, int32_t op_errno, dict_t *dict,
17b94a
-                          dict_t *xdata)
17b94a
+static int32_t
17b94a
+afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
17b94a
 {
17b94a
-    int call_cnt = 0, len = 0;
17b94a
-    char *lockinfo_buf = NULL;
17b94a
-    dict_t *lockinfo = NULL, *newdict = NULL;
17b94a
-    afr_local_t *local = NULL;
17b94a
+    afr_local_t *local;
17b94a
+    dict_t *local_dict;
17b94a
+    dict_t *local_xdata;
17b94a
+    int32_t ret;
17b94a
 
17b94a
-    LOCK(&frame->lock);
17b94a
-    {
17b94a
-        local = frame->local;
17b94a
+    local = frame->local;
17b94a
+    local_dict = NULL;
17b94a
+    local_xdata = NULL;
17b94a
 
17b94a
-        call_cnt = --local->call_count;
17b94a
+    ret = -ENOMEM;
17b94a
 
17b94a
-        if ((op_ret < 0) || (!dict && !xdata)) {
17b94a
-            goto unlock;
17b94a
-        }
17b94a
-
17b94a
-        if (xdata) {
17b94a
-            if (!local->xdata_rsp) {
17b94a
-                local->xdata_rsp = dict_new();
17b94a
-                if (!local->xdata_rsp) {
17b94a
-                    local->op_ret = -1;
17b94a
-                    local->op_errno = ENOMEM;
17b94a
-                    goto unlock;
17b94a
-                }
17b94a
-            }
17b94a
+    if ((dict != NULL) && (local->dict == NULL)) {
17b94a
+        local_dict = dict_new();
17b94a
+        if (local_dict == NULL) {
17b94a
+            goto done;
17b94a
         }
17b94a
+    }
17b94a
 
17b94a
-        if (!dict) {
17b94a
-            goto unlock;
17b94a
+    if ((xdata != NULL) && (local->xdata_rsp == NULL)) {
17b94a
+        local_xdata = dict_new();
17b94a
+        if (local_xdata == NULL) {
17b94a
+            goto done;
17b94a
         }
17b94a
+    }
17b94a
 
17b94a
-        op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
17b94a
-                                      (void **)&lockinfo_buf, &len;;
17b94a
+    if ((local_dict != NULL) || (local_xdata != NULL)) {
17b94a
+        /* TODO: Maybe it would be better to preallocate both dicts before
17b94a
+         *       sending the requests. This way we don't need to use a LOCK()
17b94a
+         *       here. */
17b94a
+        LOCK(&frame->lock);
17b94a
 
17b94a
-        if (!lockinfo_buf) {
17b94a
-            goto unlock;
17b94a
+        if ((local_dict != NULL) && (local->dict == NULL)) {
17b94a
+            local->dict = local_dict;
17b94a
+            local_dict = NULL;
17b94a
         }
17b94a
 
17b94a
-        if (!local->dict) {
17b94a
-            local->dict = dict_new();
17b94a
-            if (!local->dict) {
17b94a
-                local->op_ret = -1;
17b94a
-                local->op_errno = ENOMEM;
17b94a
-                goto unlock;
17b94a
-            }
17b94a
+        if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) {
17b94a
+            local->xdata_rsp = local_xdata;
17b94a
+            local_xdata = NULL;
17b94a
         }
17b94a
-    }
17b94a
-unlock:
17b94a
-    UNLOCK(&frame->lock);
17b94a
 
17b94a
-    if (lockinfo_buf != NULL) {
17b94a
-        lockinfo = dict_new();
17b94a
-        if (lockinfo == NULL) {
17b94a
-            local->op_ret = -1;
17b94a
-            local->op_errno = ENOMEM;
17b94a
-        } else {
17b94a
-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
17b94a
-
17b94a
-            if (lockinfo && local->dict) {
17b94a
-                dict_copy(lockinfo, local->dict);
17b94a
-            }
17b94a
-        }
17b94a
-    }
17b94a
-
17b94a
-    if (xdata && local->xdata_rsp) {
17b94a
-        dict_copy(xdata, local->xdata_rsp);
17b94a
+        UNLOCK(&frame->lock);
17b94a
     }
17b94a
 
17b94a
-    if (!call_cnt) {
17b94a
-        newdict = dict_new();
17b94a
-        if (!newdict) {
17b94a
-            local->op_ret = -1;
17b94a
-            local->op_errno = ENOMEM;
17b94a
-            goto unwind;
17b94a
+    if (dict != NULL) {
17b94a
+        if (dict_copy(dict, local->dict) < 0) {
17b94a
+            goto done;
17b94a
         }
17b94a
+    }
17b94a
 
17b94a
-        op_ret = dict_allocate_and_serialize(
17b94a
-            local->dict, (char **)&lockinfo_buf, (unsigned int *)&len;;
17b94a
-        if (op_ret != 0) {
17b94a
-            local->op_ret = -1;
17b94a
-            goto unwind;
17b94a
+    if (xdata != NULL) {
17b94a
+        if (dict_copy(xdata, local->xdata_rsp) < 0) {
17b94a
+            goto done;
17b94a
         }
17b94a
+    }
17b94a
 
17b94a
-        op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
17b94a
-                                 (void *)lockinfo_buf, len);
17b94a
-        if (op_ret < 0) {
17b94a
-            local->op_ret = -1;
17b94a
-            local->op_errno = -op_ret;
17b94a
-            goto unwind;
17b94a
-        }
17b94a
+    ret = 0;
17b94a
 
17b94a
-    unwind:
17b94a
-        AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
17b94a
-                         local->xdata_rsp);
17b94a
+done:
17b94a
+    if (local_dict != NULL) {
17b94a
+        dict_unref(local_dict);
17b94a
     }
17b94a
 
17b94a
-    dict_unref(lockinfo);
17b94a
+    if (local_xdata != NULL) {
17b94a
+        dict_unref(local_xdata);
17b94a
+    }
17b94a
 
17b94a
-    return 0;
17b94a
+    return ret;
17b94a
 }
17b94a
 
17b94a
-int32_t
17b94a
-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
17b94a
-                           dict_t *xdata)
17b94a
+static void
17b94a
+afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret,
17b94a
+                                 int32_t op_errno, dict_t *dict, dict_t *xdata,
17b94a
+                                 bool is_fgetxattr)
17b94a
 {
17b94a
-    int call_cnt = 0, len = 0;
17b94a
+    int len = 0;
17b94a
     char *lockinfo_buf = NULL;
17b94a
     dict_t *lockinfo = NULL, *newdict = NULL;
17b94a
     afr_local_t *local = NULL;
17b94a
 
17b94a
-    LOCK(&frame->lock);
17b94a
-    {
17b94a
-        local = frame->local;
17b94a
-
17b94a
-        call_cnt = --local->call_count;
17b94a
-
17b94a
-        if ((op_ret < 0) || (!dict && !xdata)) {
17b94a
-            goto unlock;
17b94a
-        }
17b94a
-
17b94a
-        if (xdata) {
17b94a
-            if (!local->xdata_rsp) {
17b94a
-                local->xdata_rsp = dict_new();
17b94a
-                if (!local->xdata_rsp) {
17b94a
-                    local->op_ret = -1;
17b94a
-                    local->op_errno = ENOMEM;
17b94a
-                    goto unlock;
17b94a
-                }
17b94a
-            }
17b94a
-        }
17b94a
-
17b94a
-        if (!dict) {
17b94a
-            goto unlock;
17b94a
-        }
17b94a
+    local = frame->local;
17b94a
 
17b94a
+    if ((op_ret >= 0) && (dict != NULL)) {
17b94a
         op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
17b94a
                                       (void **)&lockinfo_buf, &len;;
17b94a
-
17b94a
-        if (!lockinfo_buf) {
17b94a
-            goto unlock;
17b94a
-        }
17b94a
-
17b94a
-        if (!local->dict) {
17b94a
-            local->dict = dict_new();
17b94a
-            if (!local->dict) {
17b94a
-                local->op_ret = -1;
17b94a
-                local->op_errno = ENOMEM;
17b94a
-                goto unlock;
17b94a
+        if (lockinfo_buf != NULL) {
17b94a
+            lockinfo = dict_new();
17b94a
+            if (lockinfo == NULL) {
17b94a
+                op_ret = -1;
17b94a
+            } else {
17b94a
+                op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
17b94a
             }
17b94a
         }
17b94a
     }
17b94a
-unlock:
17b94a
-    UNLOCK(&frame->lock);
17b94a
 
17b94a
-    if (lockinfo_buf != NULL) {
17b94a
-        lockinfo = dict_new();
17b94a
-        if (lockinfo == NULL) {
17b94a
-            local->op_ret = -1;
17b94a
-            local->op_errno = ENOMEM;
17b94a
-        } else {
17b94a
-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
17b94a
-
17b94a
-            if (lockinfo && local->dict) {
17b94a
-                dict_copy(lockinfo, local->dict);
17b94a
-            }
17b94a
+    if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) {
17b94a
+        op_ret = afr_update_local_dicts(frame, lockinfo, xdata);
17b94a
+        if (lockinfo != NULL) {
17b94a
+            dict_unref(lockinfo);
17b94a
         }
17b94a
     }
17b94a
 
17b94a
-    if (xdata && local->xdata_rsp) {
17b94a
-        dict_copy(xdata, local->xdata_rsp);
17b94a
+    if (op_ret < 0) {
17b94a
+        local->op_ret = -1;
17b94a
+        local->op_errno = ENOMEM;
17b94a
     }
17b94a
 
17b94a
-    if (!call_cnt) {
17b94a
+    if (uatomic_sub_return(&local->call_count, 1) == 0) {
17b94a
         newdict = dict_new();
17b94a
         if (!newdict) {
17b94a
             local->op_ret = -1;
17b94a
-            local->op_errno = ENOMEM;
17b94a
+            local->op_errno = op_errno = ENOMEM;
17b94a
             goto unwind;
17b94a
         }
17b94a
 
17b94a
@@ -1057,23 +992,58 @@ unlock:
17b94a
             local->dict, (char **)&lockinfo_buf, (unsigned int *)&len;;
17b94a
         if (op_ret != 0) {
17b94a
             local->op_ret = -1;
17b94a
+            local->op_errno = op_errno = ENOMEM;
17b94a
             goto unwind;
17b94a
         }
17b94a
 
17b94a
         op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
17b94a
                                  (void *)lockinfo_buf, len);
17b94a
         if (op_ret < 0) {
17b94a
-            local->op_ret = -1;
17b94a
-            local->op_errno = -op_ret;
17b94a
+            GF_FREE(lockinfo_buf);
17b94a
+            local->op_ret = op_ret = -1;
17b94a
+            local->op_errno = op_errno = -op_ret;
17b94a
             goto unwind;
17b94a
         }
17b94a
 
17b94a
     unwind:
17b94a
-        AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
17b94a
-                         local->xdata_rsp);
17b94a
+        /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret
17b94a
+         *       and local->op_errno. This doesn't seem right because any
17b94a
+         *       failure during processing of each answer could be silently
17b94a
+         *       ignored. This is kept this was the old behavior and because
17b94a
+         *       local->op_ret is initialized as -1 and local->op_errno is
17b94a
+         *       initialized as EUCLEAN, which makes these values useless. */
17b94a
+        if (is_fgetxattr) {
17b94a
+            AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
17b94a
+                             local->xdata_rsp);
17b94a
+        } else {
17b94a
+            AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
17b94a
+                             local->xdata_rsp);
17b94a
+        }
17b94a
+
17b94a
+        if (newdict != NULL) {
17b94a
+            dict_unref(newdict);
17b94a
+        }
17b94a
     }
17b94a
+}
17b94a
+
17b94a
+static int32_t
17b94a
+afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
+                          int32_t op_ret, int32_t op_errno, dict_t *dict,
17b94a
+                          dict_t *xdata)
17b94a
+{
17b94a
+    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
17b94a
+                                     false);
17b94a
 
17b94a
-    dict_unref(lockinfo);
17b94a
+    return 0;
17b94a
+}
17b94a
+
17b94a
+static int32_t
17b94a
+afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
+                           int32_t op_ret, int32_t op_errno, dict_t *dict,
17b94a
+                           dict_t *xdata)
17b94a
+{
17b94a
+    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
17b94a
+                                     true);
17b94a
 
17b94a
     return 0;
17b94a
 }
17b94a
-- 
17b94a
1.8.3.1
17b94a