9f5ccc
From ad233c1b3abdfe2bdfd1eacc83b5f84b7afa6b46 Mon Sep 17 00:00:00 2001
9f5ccc
From: N Balachandran <nbalacha@redhat.com>
9f5ccc
Date: Tue, 1 Oct 2019 17:37:15 +0530
9f5ccc
Subject: [PATCH 304/304] cluster/dht: Correct fd processing loop
9f5ccc
9f5ccc
The fd processing loops in the
9f5ccc
dht_migration_complete_check_task and the
9f5ccc
dht_rebalance_inprogress_task functions were unsafe
9f5ccc
and could cause an open to be sent on an already freed
9f5ccc
fd. This has been fixed.
9f5ccc
9f5ccc
> Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
9f5ccc
> Fixes: bz#1757399
9f5ccc
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
9f5ccc
> (Cherry picked from commit 9b15867070b0cc241ab165886292ecffc3bc0aed)
9f5ccc
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23506/)
9f5ccc
9f5ccc
Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
9f5ccc
BUG: 1756325
9f5ccc
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
9f5ccc
Reviewed-on: https://code.engineering.redhat.com/gerrit/182826
9f5ccc
Tested-by: RHGS Build Bot <nigelb@redhat.com>
9f5ccc
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
9f5ccc
---
9f5ccc
 xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++----------
9f5ccc
 1 file changed, 62 insertions(+), 22 deletions(-)
9f5ccc
9f5ccc
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
9f5ccc
index 4c57e0d..1e9fee0 100644
9f5ccc
--- a/xlators/cluster/dht/src/dht-helper.c
9f5ccc
+++ b/xlators/cluster/dht/src/dht-helper.c
9f5ccc
@@ -1261,6 +1261,7 @@ dht_migration_complete_check_task(void *data)
9f5ccc
     fd_t *tmp = NULL;
9f5ccc
     uint64_t tmp_miginfo = 0;
9f5ccc
     dht_migrate_info_t *miginfo = NULL;
9f5ccc
+    gf_boolean_t skip_open = _gf_false;
9f5ccc
     int open_failed = 0;
9f5ccc
 
9f5ccc
     this = THIS;
9f5ccc
@@ -1399,24 +1400,34 @@ dht_migration_complete_check_task(void *data)
9f5ccc
      * the loop will cause the destruction of the fd. So we need to
9f5ccc
      * iterate the list safely because iter_fd cannot be trusted.
9f5ccc
      */
9f5ccc
-    list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
9f5ccc
-    {
9f5ccc
-        if (fd_is_anonymous(iter_fd))
9f5ccc
-            continue;
9f5ccc
-
9f5ccc
-        if (dht_fd_open_on_dst(this, iter_fd, dst_node))
9f5ccc
-            continue;
9f5ccc
-
9f5ccc
+    iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
9f5ccc
+    while (&iter_fd->inode_list != (&inode->fd_list)) {
9f5ccc
+        if (fd_is_anonymous(iter_fd) ||
9f5ccc
+            (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
9f5ccc
+            if (!tmp) {
9f5ccc
+                iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
9f5ccc
+                                     inode_list);
9f5ccc
+                continue;
9f5ccc
+            }
9f5ccc
+            skip_open = _gf_true;
9f5ccc
+        }
9f5ccc
         /* We need to release the inode->lock before calling
9f5ccc
          * syncop_open() to avoid possible deadlocks. However this
9f5ccc
          * can cause the iter_fd to be released by other threads.
9f5ccc
          * To avoid this, we take a reference before releasing the
9f5ccc
          * lock.
9f5ccc
          */
9f5ccc
-        __fd_ref(iter_fd);
9f5ccc
+        fd_ref(iter_fd);
9f5ccc
 
9f5ccc
         UNLOCK(&inode->lock);
9f5ccc
 
9f5ccc
+        if (tmp) {
9f5ccc
+            fd_unref(tmp);
9f5ccc
+            tmp = NULL;
9f5ccc
+        }
9f5ccc
+        if (skip_open)
9f5ccc
+            goto next;
9f5ccc
+
9f5ccc
         /* flags for open are stripped down to allow following the
9f5ccc
          * new location of the file, otherwise we can get EEXIST or
9f5ccc
          * truncate the file again as rebalance is moving the data */
9f5ccc
@@ -1438,9 +1449,11 @@ dht_migration_complete_check_task(void *data)
9f5ccc
             dht_fd_ctx_set(this, iter_fd, dst_node);
9f5ccc
         }
9f5ccc
 
9f5ccc
-        fd_unref(iter_fd);
9f5ccc
-
9f5ccc
+    next:
9f5ccc
         LOCK(&inode->lock);
9f5ccc
+        skip_open = _gf_false;
9f5ccc
+        tmp = iter_fd;
9f5ccc
+        iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
9f5ccc
     }
9f5ccc
 
9f5ccc
     SYNCTASK_SETID(frame->root->uid, frame->root->gid);
9f5ccc
@@ -1453,6 +1466,10 @@ dht_migration_complete_check_task(void *data)
9f5ccc
 
9f5ccc
 unlock:
9f5ccc
     UNLOCK(&inode->lock);
9f5ccc
+    if (tmp) {
9f5ccc
+        fd_unref(tmp);
9f5ccc
+        tmp = NULL;
9f5ccc
+    }
9f5ccc
 
9f5ccc
 out:
9f5ccc
     if (dict) {
9f5ccc
@@ -1534,6 +1551,7 @@ dht_rebalance_inprogress_task(void *data)
9f5ccc
     int open_failed = 0;
9f5ccc
     uint64_t tmp_miginfo = 0;
9f5ccc
     dht_migrate_info_t *miginfo = NULL;
9f5ccc
+    gf_boolean_t skip_open = _gf_false;
9f5ccc
 
9f5ccc
     this = THIS;
9f5ccc
     frame = data;
9f5ccc
@@ -1654,24 +1672,40 @@ dht_rebalance_inprogress_task(void *data)
9f5ccc
      * the loop will cause the destruction of the fd. So we need to
9f5ccc
      * iterate the list safely because iter_fd cannot be trusted.
9f5ccc
      */
9f5ccc
-    list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
9f5ccc
-    {
9f5ccc
-        if (fd_is_anonymous(iter_fd))
9f5ccc
-            continue;
9f5ccc
-
9f5ccc
-        if (dht_fd_open_on_dst(this, iter_fd, dst_node))
9f5ccc
-            continue;
9f5ccc
-
9f5ccc
+    iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
9f5ccc
+    while (&iter_fd->inode_list != (&inode->fd_list)) {
9f5ccc
         /* We need to release the inode->lock before calling
9f5ccc
          * syncop_open() to avoid possible deadlocks. However this
9f5ccc
          * can cause the iter_fd to be released by other threads.
9f5ccc
          * To avoid this, we take a reference before releasing the
9f5ccc
          * lock.
9f5ccc
          */
9f5ccc
-        __fd_ref(iter_fd);
9f5ccc
 
9f5ccc
+        if (fd_is_anonymous(iter_fd) ||
9f5ccc
+            (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
9f5ccc
+            if (!tmp) {
9f5ccc
+                iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
9f5ccc
+                                     inode_list);
9f5ccc
+                continue;
9f5ccc
+            }
9f5ccc
+            skip_open = _gf_true;
9f5ccc
+        }
9f5ccc
+
9f5ccc
+        /* Yes, this is ugly but there isn't a cleaner way to do this
9f5ccc
+         * the fd_ref is an atomic increment so not too bad. We want to
9f5ccc
+         * reduce the number of inode locks and unlocks.
9f5ccc
+         */
9f5ccc
+
9f5ccc
+        fd_ref(iter_fd);
9f5ccc
         UNLOCK(&inode->lock);
9f5ccc
 
9f5ccc
+        if (tmp) {
9f5ccc
+            fd_unref(tmp);
9f5ccc
+            tmp = NULL;
9f5ccc
+        }
9f5ccc
+        if (skip_open)
9f5ccc
+            goto next;
9f5ccc
+
9f5ccc
         /* flags for open are stripped down to allow following the
9f5ccc
          * new location of the file, otherwise we can get EEXIST or
9f5ccc
          * truncate the file again as rebalance is moving the data */
9f5ccc
@@ -1692,9 +1726,11 @@ dht_rebalance_inprogress_task(void *data)
9f5ccc
             dht_fd_ctx_set(this, iter_fd, dst_node);
9f5ccc
         }
9f5ccc
 
9f5ccc
-        fd_unref(iter_fd);
9f5ccc
-
9f5ccc
+    next:
9f5ccc
         LOCK(&inode->lock);
9f5ccc
+        skip_open = _gf_false;
9f5ccc
+        tmp = iter_fd;
9f5ccc
+        iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
9f5ccc
     }
9f5ccc
 
9f5ccc
     SYNCTASK_SETID(frame->root->uid, frame->root->gid);
9f5ccc
@@ -1702,6 +1738,10 @@ dht_rebalance_inprogress_task(void *data)
9f5ccc
 unlock:
9f5ccc
     UNLOCK(&inode->lock);
9f5ccc
 
9f5ccc
+    if (tmp) {
9f5ccc
+        fd_unref(tmp);
9f5ccc
+        tmp = NULL;
9f5ccc
+    }
9f5ccc
     if (open_failed) {
9f5ccc
         ret = -1;
9f5ccc
         goto out;
9f5ccc
-- 
9f5ccc
1.8.3.1
9f5ccc