Blob Blame History Raw
From 7b60141d7ed373425e5ffad6359aedb52b2712d2 Mon Sep 17 00:00:00 2001
From: Susant Palai <spalai@redhat.com>
Date: Mon, 7 Nov 2016 12:00:13 +0530
Subject: [PATCH 455/473] dht/rebalance: Crawler performance improvement

 The job of the crawler in rebalance is to fetch files from each
local subvolume and push them to migration queue if it is eligible for
migration. And we do a lookup on the entries received to figure out the
eligibilty. Since, the lookup done is on a local subvolume we receive
linkto files and regular files as well. This requires us to do two lookups.

first: do a lookup on the file to figure out whether it is a linkto file
second: do a lookup on the file to figure out if it should be migrated

Note: The migrator thread also does one lookup for the file before
migration.

Optimization: Remove the lookup done by the crawler. Offload these task
to the migrator threads. For linkto file verification get the stat and
xattr information from readdirp.

So in total we have one lookup instead of three for each entry.

Performance numbers:
Create two node, two brick setup. Created 100000 files. And started
rebalance. Since, there is no add-brick, no files will be migrated and
we will get the crawler performance.

Without patch:
[root@gprfs039 ~]# grs
                                    Node Rebalanced-files          size
scanned      failures       skipped               status  run time in
h:m:s
                               ---------      -----------   -----------
-----------   -----------   -----------         ------------
--------------
                               localhost                0        0Bytes
50070             0             0            completed        0:0:48
                            server2                0        0Bytes
49930             0             0            completed        0:0:44
volume rebalance: test1: success

Total: 48 seconds

WiththecurrentPatch:
[root@gprfs039 mnt]# gluster v rebalance test1 status
                                    Node Rebalanced-files          size
scanned      failures       skipped               status  run time in
h:m:s
                               ---------      -----------   -----------
-----------   -----------   -----------         ------------
--------------
                               localhost                0        0Bytes
50070             0             0            completed        0:0:12
                            server2                0        0Bytes
49930             0             0            completed        0:0:12
volume rebalance: test1: success

Total: 12 seconds

That's 4X speed gain. :)

> Updates glusterfs#155
> Change-Id: Idc8e5b366e76c54aa40d698876ae62fe1630b6cc
> BUG: 1439571
> Signed-off-by: Susant Palai <spalai@redhat.com>
> Reviewed-on: https://review.gluster.org/15781
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>

Change-Id: Idc8e5b366e76c54aa40d698876ae62fe1630b6cc
BUG: 1381142
Signed-off-by: Susant Palai <spalai@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/106411
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
---
 xlators/cluster/dht/src/dht-rebalance.c | 132 ++------------------------------
 1 file changed, 6 insertions(+), 126 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 570843a..f1189e9 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2840,12 +2840,8 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
         int                     ret             = -1;
         char                    is_linkfile     = 0;
         gf_dirent_t            *df_entry        = NULL;
-        loc_t                   entry_loc       = {0,};
         dict_t                 *xattr_rsp       = NULL;
-        struct iatt             iatt            = {0,};
         struct dht_container   *tmp_container   = NULL;
-        xlator_t               *hashed_subvol   = NULL;
-        xlator_t               *cached_subvol   = NULL;
 
         if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
                 ret = -1;
@@ -2861,7 +2857,7 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
                 ret = syncop_readdirp (conf->local_subvols[i], fd, 131072,
                                        dir_dfmeta->offset_var[i].offset,
                                        &(dir_dfmeta->equeue[i]),
-                                       NULL, NULL);
+                                       xattr_req, NULL);
                 if (ret == 0) {
                         dir_dfmeta->offset_var[i].readdir_done = 1;
                         ret = 0;
@@ -2925,136 +2921,22 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
                         continue;
                 }
 
-                loc_wipe (&entry_loc);
-                ret = dht_build_child_loc (this, &entry_loc, loc,
-                                          df_entry->d_name);
-                if (ret) {
-                        gf_log (this->name, GF_LOG_ERROR, "Child loc"
-                                " build failed");
-                        ret = -1;
-                        goto out;
-                }
-
-                if (gf_uuid_is_null (df_entry->d_stat.ia_gfid)) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                DHT_MSG_GFID_NULL,
-                                "%s/%s gfid not present", loc->path,
-                                df_entry->d_name);
-                        continue;
-                }
-
-                gf_uuid_copy (entry_loc.gfid, df_entry->d_stat.ia_gfid);
-
-                if (gf_uuid_is_null (loc->gfid)) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                DHT_MSG_GFID_NULL,
-                                "%s/%s gfid not present", loc->path,
-                                df_entry->d_name);
-                        continue;
-                }
-
-                gf_uuid_copy (entry_loc.pargfid, loc->gfid);
-
-                entry_loc.inode->ia_type = df_entry->d_stat.ia_type;
-
-                if (xattr_rsp) {
-                        dict_unref (xattr_rsp);
-                        xattr_rsp = NULL;
-                }
-
-                ret = syncop_lookup (conf->local_subvols[i], &entry_loc,
-                                        &iatt, NULL, xattr_req, &xattr_rsp);
-                if (ret) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                DHT_MSG_MIGRATE_FILE_FAILED,
-                                "Migrate file failed:%s lookup failed",
-                                entry_loc.path);
-
-                        if (-ret != ENOENT && -ret != ESTALE) {
-
-                                defrag->total_failures++;
-
-                                if (conf->decommission_in_progress) {
-                                        ret = -1;
-                                        goto out;
-                                } else {
-                                       *should_commit_hash = 0;
-                                        continue;
-                                }
-                        }
-
-                        continue;
-                }
-
-
-                is_linkfile = check_is_linkfile (NULL, &iatt, xattr_rsp,
-                                                conf->link_xattr_name);
+                is_linkfile = check_is_linkfile (NULL, &df_entry->d_stat,
+                                                 df_entry->dict,
+                                                 conf->link_xattr_name);
 
                 if (is_linkfile) {
                         /* No need to add linkto file to the queue for
                            migration. Only the actual data file need to
                            be checked for migration criteria.
                         */
+
                         gf_msg_debug (this->name, 0, "Skipping linkfile"
-                                      " %s on subvol: %s", entry_loc.path,
+                                      " %s on subvol: %s", df_entry->d_name,
                                       conf->local_subvols[i]->name);
                         continue;
                 }
 
-                ret = syncop_lookup (this, &entry_loc, NULL, NULL,
-                                     NULL, NULL);
-                if (ret) {
-                        gf_msg (this->name, GF_LOG_WARNING, -ret,
-                                DHT_MSG_MIGRATE_FILE_FAILED,
-                                "lookup failed for file:%s",
-                                entry_loc.path);
-
-                        if (-ret != ENOENT && -ret != ESTALE) {
-
-                                defrag->total_failures++;
-
-                                if (conf->decommission_in_progress) {
-                                        ret = -1;
-                                        goto out;
-                                } else {
-                                        *should_commit_hash = 0;
-                                        continue;
-                                }
-                        }
-
-                        continue;
-                }
-
-               /* if distribute is present, it will honor this key.
-                * -1, ENODATA is returned if distribute is not present
-                * or file doesn't have a link-file. If file has
-                * link-file, the path of link-file will be the value,
-                * and also that guarantees that file has to be mostly
-                * migrated */
-
-                hashed_subvol = dht_subvol_get_hashed (this, &entry_loc);
-                if (!hashed_subvol) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                DHT_MSG_HASHED_SUBVOL_GET_FAILED,
-                                "Failed to get hashed subvol for %s",
-                                loc->path);
-                        continue;
-                }
-
-                cached_subvol = dht_subvol_get_cached (this, entry_loc.inode);
-                if (!cached_subvol) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                DHT_MSG_CACHED_SUBVOL_GET_FAILED,
-                                "Failed to get cached subvol for %s",
-                                loc->path);
-
-                        continue;
-                }
-
-                if (hashed_subvol == cached_subvol) {
-                        continue;
-                }
-
                /*Build Container Structure */
 
                 tmp_container =  GF_CALLOC (1, sizeof(struct dht_container),
@@ -3115,8 +2997,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
         }
 
 out:
-        loc_wipe (&entry_loc);
-
         if (ret == 0) {
                 *container = tmp_container;
         } else {
-- 
1.8.3.1