74096c
From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001
74096c
From: Xavi Hernandez <xhernandez@redhat.com>
74096c
Date: Sat, 13 Feb 2021 18:37:32 +0100
74096c
Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr
74096c
74096c
A shared dictionary was updated outside the lock after having updated
74096c
the number of remaining answers. This means that one thread may be
74096c
processing the last answer and unwinding the request before another
74096c
thread completes updating the dict.
74096c
74096c
    Thread 1                           Thread 2
74096c
74096c
    LOCK()
74096c
    call_cnt-- (=1)
74096c
    UNLOCK()
74096c
                                       LOCK()
74096c
                                       call_cnt-- (=0)
74096c
                                       UNLOCK()
74096c
                                       update_dict(dict)
74096c
                                       if (call_cnt == 0) {
74096c
                                           STACK_UNWIND(dict);
74096c
                                       }
74096c
    update_dict(dict)
74096c
    if (call_cnt == 0) {
74096c
        STACK_UNWIND(dict);
74096c
    }
74096c
74096c
The updates from thread 1 are lost.
74096c
74096c
This patch also reduces the work done inside the locked region and
74096c
reduces code duplication.
74096c
74096c
Upstream-patch:
74096c
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162
74096c
> Fixes: #2161
74096c
> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
74096c
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
74096c
74096c
BUG: 1911292
74096c
Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
74096c
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/228924
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++-----------------
74096c
 1 file changed, 112 insertions(+), 142 deletions(-)
74096c
74096c
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
74096c
index cf305af..98e195a 100644
74096c
--- a/xlators/cluster/afr/src/afr-inode-read.c
74096c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
74096c
@@ -15,6 +15,8 @@
74096c
 #include <stdlib.h>
74096c
 #include <signal.h>
74096c
 
74096c
+#include <urcu/uatomic.h>
74096c
+
74096c
 #include <glusterfs/glusterfs.h>
74096c
 #include "afr.h"
74096c
 #include <glusterfs/dict.h>
74096c
@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
     return 0;
74096c
 }
74096c
 
74096c
-int32_t
74096c
-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
-                          int32_t op_ret, int32_t op_errno, dict_t *dict,
74096c
-                          dict_t *xdata)
74096c
+static int32_t
74096c
+afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
74096c
 {
74096c
-    int call_cnt = 0, len = 0;
74096c
-    char *lockinfo_buf = NULL;
74096c
-    dict_t *lockinfo = NULL, *newdict = NULL;
74096c
-    afr_local_t *local = NULL;
74096c
+    afr_local_t *local;
74096c
+    dict_t *local_dict;
74096c
+    dict_t *local_xdata;
74096c
+    int32_t ret;
74096c
 
74096c
-    LOCK(&frame->lock);
74096c
-    {
74096c
-        local = frame->local;
74096c
+    local = frame->local;
74096c
+    local_dict = NULL;
74096c
+    local_xdata = NULL;
74096c
 
74096c
-        call_cnt = --local->call_count;
74096c
+    ret = -ENOMEM;
74096c
 
74096c
-        if ((op_ret < 0) || (!dict && !xdata)) {
74096c
-            goto unlock;
74096c
-        }
74096c
-
74096c
-        if (xdata) {
74096c
-            if (!local->xdata_rsp) {
74096c
-                local->xdata_rsp = dict_new();
74096c
-                if (!local->xdata_rsp) {
74096c
-                    local->op_ret = -1;
74096c
-                    local->op_errno = ENOMEM;
74096c
-                    goto unlock;
74096c
-                }
74096c
-            }
74096c
+    if ((dict != NULL) && (local->dict == NULL)) {
74096c
+        local_dict = dict_new();
74096c
+        if (local_dict == NULL) {
74096c
+            goto done;
74096c
         }
74096c
+    }
74096c
 
74096c
-        if (!dict) {
74096c
-            goto unlock;
74096c
+    if ((xdata != NULL) && (local->xdata_rsp == NULL)) {
74096c
+        local_xdata = dict_new();
74096c
+        if (local_xdata == NULL) {
74096c
+            goto done;
74096c
         }
74096c
+    }
74096c
 
74096c
-        op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
74096c
-                                      (void **)&lockinfo_buf, &len;;
74096c
+    if ((local_dict != NULL) || (local_xdata != NULL)) {
74096c
+        /* TODO: Maybe it would be better to preallocate both dicts before
74096c
+         *       sending the requests. This way we don't need to use a LOCK()
74096c
+         *       here. */
74096c
+        LOCK(&frame->lock);
74096c
 
74096c
-        if (!lockinfo_buf) {
74096c
-            goto unlock;
74096c
+        if ((local_dict != NULL) && (local->dict == NULL)) {
74096c
+            local->dict = local_dict;
74096c
+            local_dict = NULL;
74096c
         }
74096c
 
74096c
-        if (!local->dict) {
74096c
-            local->dict = dict_new();
74096c
-            if (!local->dict) {
74096c
-                local->op_ret = -1;
74096c
-                local->op_errno = ENOMEM;
74096c
-                goto unlock;
74096c
-            }
74096c
+        if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) {
74096c
+            local->xdata_rsp = local_xdata;
74096c
+            local_xdata = NULL;
74096c
         }
74096c
-    }
74096c
-unlock:
74096c
-    UNLOCK(&frame->lock);
74096c
 
74096c
-    if (lockinfo_buf != NULL) {
74096c
-        lockinfo = dict_new();
74096c
-        if (lockinfo == NULL) {
74096c
-            local->op_ret = -1;
74096c
-            local->op_errno = ENOMEM;
74096c
-        } else {
74096c
-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
74096c
-
74096c
-            if (lockinfo && local->dict) {
74096c
-                dict_copy(lockinfo, local->dict);
74096c
-            }
74096c
-        }
74096c
-    }
74096c
-
74096c
-    if (xdata && local->xdata_rsp) {
74096c
-        dict_copy(xdata, local->xdata_rsp);
74096c
+        UNLOCK(&frame->lock);
74096c
     }
74096c
 
74096c
-    if (!call_cnt) {
74096c
-        newdict = dict_new();
74096c
-        if (!newdict) {
74096c
-            local->op_ret = -1;
74096c
-            local->op_errno = ENOMEM;
74096c
-            goto unwind;
74096c
+    if (dict != NULL) {
74096c
+        if (dict_copy(dict, local->dict) < 0) {
74096c
+            goto done;
74096c
         }
74096c
+    }
74096c
 
74096c
-        op_ret = dict_allocate_and_serialize(
74096c
-            local->dict, (char **)&lockinfo_buf, (unsigned int *)&len;;
74096c
-        if (op_ret != 0) {
74096c
-            local->op_ret = -1;
74096c
-            goto unwind;
74096c
+    if (xdata != NULL) {
74096c
+        if (dict_copy(xdata, local->xdata_rsp) < 0) {
74096c
+            goto done;
74096c
         }
74096c
+    }
74096c
 
74096c
-        op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
74096c
-                                 (void *)lockinfo_buf, len);
74096c
-        if (op_ret < 0) {
74096c
-            local->op_ret = -1;
74096c
-            local->op_errno = -op_ret;
74096c
-            goto unwind;
74096c
-        }
74096c
+    ret = 0;
74096c
 
74096c
-    unwind:
74096c
-        AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
74096c
-                         local->xdata_rsp);
74096c
+done:
74096c
+    if (local_dict != NULL) {
74096c
+        dict_unref(local_dict);
74096c
     }
74096c
 
74096c
-    dict_unref(lockinfo);
74096c
+    if (local_xdata != NULL) {
74096c
+        dict_unref(local_xdata);
74096c
+    }
74096c
 
74096c
-    return 0;
74096c
+    return ret;
74096c
 }
74096c
 
74096c
-int32_t
74096c
-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
74096c
-                           dict_t *xdata)
74096c
+static void
74096c
+afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret,
74096c
+                                 int32_t op_errno, dict_t *dict, dict_t *xdata,
74096c
+                                 bool is_fgetxattr)
74096c
 {
74096c
-    int call_cnt = 0, len = 0;
74096c
+    int len = 0;
74096c
     char *lockinfo_buf = NULL;
74096c
     dict_t *lockinfo = NULL, *newdict = NULL;
74096c
     afr_local_t *local = NULL;
74096c
 
74096c
-    LOCK(&frame->lock);
74096c
-    {
74096c
-        local = frame->local;
74096c
-
74096c
-        call_cnt = --local->call_count;
74096c
-
74096c
-        if ((op_ret < 0) || (!dict && !xdata)) {
74096c
-            goto unlock;
74096c
-        }
74096c
-
74096c
-        if (xdata) {
74096c
-            if (!local->xdata_rsp) {
74096c
-                local->xdata_rsp = dict_new();
74096c
-                if (!local->xdata_rsp) {
74096c
-                    local->op_ret = -1;
74096c
-                    local->op_errno = ENOMEM;
74096c
-                    goto unlock;
74096c
-                }
74096c
-            }
74096c
-        }
74096c
-
74096c
-        if (!dict) {
74096c
-            goto unlock;
74096c
-        }
74096c
+    local = frame->local;
74096c
 
74096c
+    if ((op_ret >= 0) && (dict != NULL)) {
74096c
         op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
74096c
                                       (void **)&lockinfo_buf, &len;;
74096c
-
74096c
-        if (!lockinfo_buf) {
74096c
-            goto unlock;
74096c
-        }
74096c
-
74096c
-        if (!local->dict) {
74096c
-            local->dict = dict_new();
74096c
-            if (!local->dict) {
74096c
-                local->op_ret = -1;
74096c
-                local->op_errno = ENOMEM;
74096c
-                goto unlock;
74096c
+        if (lockinfo_buf != NULL) {
74096c
+            lockinfo = dict_new();
74096c
+            if (lockinfo == NULL) {
74096c
+                op_ret = -1;
74096c
+            } else {
74096c
+                op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
74096c
             }
74096c
         }
74096c
     }
74096c
-unlock:
74096c
-    UNLOCK(&frame->lock);
74096c
 
74096c
-    if (lockinfo_buf != NULL) {
74096c
-        lockinfo = dict_new();
74096c
-        if (lockinfo == NULL) {
74096c
-            local->op_ret = -1;
74096c
-            local->op_errno = ENOMEM;
74096c
-        } else {
74096c
-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
74096c
-
74096c
-            if (lockinfo && local->dict) {
74096c
-                dict_copy(lockinfo, local->dict);
74096c
-            }
74096c
+    if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) {
74096c
+        op_ret = afr_update_local_dicts(frame, lockinfo, xdata);
74096c
+        if (lockinfo != NULL) {
74096c
+            dict_unref(lockinfo);
74096c
         }
74096c
     }
74096c
 
74096c
-    if (xdata && local->xdata_rsp) {
74096c
-        dict_copy(xdata, local->xdata_rsp);
74096c
+    if (op_ret < 0) {
74096c
+        local->op_ret = -1;
74096c
+        local->op_errno = ENOMEM;
74096c
     }
74096c
 
74096c
-    if (!call_cnt) {
74096c
+    if (uatomic_sub_return(&local->call_count, 1) == 0) {
74096c
         newdict = dict_new();
74096c
         if (!newdict) {
74096c
             local->op_ret = -1;
74096c
-            local->op_errno = ENOMEM;
74096c
+            local->op_errno = op_errno = ENOMEM;
74096c
             goto unwind;
74096c
         }
74096c
 
74096c
@@ -1057,23 +992,58 @@ unlock:
74096c
             local->dict, (char **)&lockinfo_buf, (unsigned int *)&len;;
74096c
         if (op_ret != 0) {
74096c
             local->op_ret = -1;
74096c
+            local->op_errno = op_errno = ENOMEM;
74096c
             goto unwind;
74096c
         }
74096c
 
74096c
         op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
74096c
                                  (void *)lockinfo_buf, len);
74096c
         if (op_ret < 0) {
74096c
-            local->op_ret = -1;
74096c
-            local->op_errno = -op_ret;
74096c
+            GF_FREE(lockinfo_buf);
74096c
+            local->op_ret = op_ret = -1;
74096c
+            local->op_errno = op_errno = -op_ret;
74096c
             goto unwind;
74096c
         }
74096c
 
74096c
     unwind:
74096c
-        AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
74096c
-                         local->xdata_rsp);
74096c
+        /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret
74096c
+         *       and local->op_errno. This doesn't seem right because any
74096c
+         *       failure during processing of each answer could be silently
74096c
+         *       ignored. This is kept this was the old behavior and because
74096c
+         *       local->op_ret is initialized as -1 and local->op_errno is
74096c
+         *       initialized as EUCLEAN, which makes these values useless. */
74096c
+        if (is_fgetxattr) {
74096c
+            AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
74096c
+                             local->xdata_rsp);
74096c
+        } else {
74096c
+            AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
74096c
+                             local->xdata_rsp);
74096c
+        }
74096c
+
74096c
+        if (newdict != NULL) {
74096c
+            dict_unref(newdict);
74096c
+        }
74096c
     }
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
+                          int32_t op_ret, int32_t op_errno, dict_t *dict,
74096c
+                          dict_t *xdata)
74096c
+{
74096c
+    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
74096c
+                                     false);
74096c
 
74096c
-    dict_unref(lockinfo);
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+static int32_t
74096c
+afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
+                           int32_t op_ret, int32_t op_errno, dict_t *dict,
74096c
+                           dict_t *xdata)
74096c
+{
74096c
+    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
74096c
+                                     true);
74096c
 
74096c
     return 0;
74096c
 }
74096c
-- 
74096c
1.8.3.1
74096c