diff --git a/.glusterfs.metadata b/.glusterfs.metadata index bd41365..98d5fc3 100644 --- a/.glusterfs.metadata +++ b/.glusterfs.metadata @@ -1 +1 @@ -bf1d8624cb45d10cf4ebf43bf7d3dc53dd55485a SOURCES/glusterfs-6.0.tar.gz +c9d75f37e00502a10f64cd4ba9aafb17552e0800 SOURCES/glusterfs-6.0.tar.gz diff --git a/README.debrand b/README.debrand deleted file mode 100644 index 01c46d2..0000000 --- a/README.debrand +++ /dev/null @@ -1,2 +0,0 @@ -Warning: This package was configured for automatic debranding, but the changes -failed to apply. diff --git a/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch new file mode 100644 index 0000000..45dada1 --- /dev/null +++ b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch @@ -0,0 +1,109 @@ +From f40570f2f784dc61edb061a4931dcfc16bf51e7e Mon Sep 17 00:00:00 2001 +From: Aravinda VK +Date: Mon, 5 Aug 2019 19:00:21 +0530 +Subject: [PATCH 277/284] geo-rep: Fix Config Get Race + +When two threads(sync jobs) in Geo-rep worker calls `gconf.get` and +`gconf.getr`(realtime) at the sametime, `getr` resets the conf object +and other one gets None. Thread Lock is introduced to fix the issue. + +``` + File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", + line 368, in twrap + tf(*aargs) + File "/usr/libexec/glusterfs/python/syncdaemon/master.py", line 1987, + in syncjob + po = self.sync_engine(pb, self.log_err) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", + line 1444, in rsync + rconf.ssh_ctl_args + \ +AttributeError: 'NoneType' object has no attribute 'split' +``` + +Backport of: + > Patch: https://review.gluster.org/#/c/glusterfs/+/23158/ + > Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5 + > Updates: bz#1737484 + > Signed-off-by: Aravinda VK + +Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5 +BUG: 1729915 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/178960 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncdconfig.py | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 1fc451f..38f3594 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -17,6 +17,7 @@ import os + import shutil + from string import Template + from datetime import datetime ++from threading import Lock + + + # Global object which can be used in other modules +@@ -35,6 +36,7 @@ class GconfInvalidValue(Exception): + class Gconf(object): + def __init__(self, default_conf_file, custom_conf_file=None, + args={}, extra_tmpl_args={}, override_from_args=False): ++ self.lock = Lock() + self.default_conf_file = default_conf_file + self.custom_conf_file = custom_conf_file + self.tmp_conf_file = None +@@ -163,6 +165,11 @@ class Gconf(object): + if value is not None and not self._is_valid_value(name, value): + raise GconfInvalidValue() + ++ ++ def _load_with_lock(self): ++ with self.lock: ++ self._load() ++ + def _load(self): + self.gconf = {} + self.template_conf = [] +@@ -230,12 +237,19 @@ class Gconf(object): + self._tmpl_substitute() + self._do_typecast() + +- def reload(self): ++ def reload(self, with_lock=True): + if self._is_config_changed(): +- self._load() ++ if with_lock: ++ self._load_with_lock() ++ else: ++ self._load() + +- def get(self, name, default_value=None): +- return self.gconf.get(name, default_value) ++ def get(self, name, default_value=None, with_lock=True): ++ if with_lock: ++ with self.lock: ++ return self.gconf.get(name, default_value) ++ else: ++ return self.gconf.get(name, default_value) + + def getall(self, show_defaults=False, show_non_configurable=False): + cnf = {} +@@ -276,8 +290,9 @@ class Gconf(object): + return cnf + + def getr(self, name, default_value=None): +- self.reload() +- return self.get(name, default_value) ++ with self.lock: ++ self.reload(with_lock=False) ++ return self.get(name, default_value, with_lock=False) + + def get_help(self, name=None): + pass +-- +1.8.3.1 + diff --git a/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch new file mode 100644 index 0000000..00cb48f --- /dev/null +++ b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch @@ -0,0 +1,45 @@ +From 924a25990948c9d76001cf4134fc5a2fcbf5c02c Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 16 Aug 2019 15:38:49 +0530 +Subject: [PATCH 278/284] geo-rep: Fix worker connection issue + +All the workers connects to primary slave node. It should +connect to available slave nodes in round robin fashion +and choose different slave node if the corresponding slave +node is down. This patch fixes the same. + +Thanks Aravinda for the help in root causing this. + +Backport of: + > Patch: https://review.gluster.org/23247/ + > Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb + > Signed-off-by: Kotresh HR + > Updates: bz#1737484 + +Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb +Signed-off-by: Kotresh HR +BUG: 1729915 +Reviewed-on: https://code.engineering.redhat.com/gerrit/178961 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/subcmds.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 4ece7e0..8de7db2 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -73,7 +73,8 @@ def subcmd_worker(args): + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) +- slavehost, slavevol = args.slave.split("::") ++ slavevol = args.slave.split("::")[-1] ++ slavehost = args.resource_remote + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() +-- +1.8.3.1 + diff --git a/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch new file mode 100644 index 0000000..3bbd56c --- /dev/null +++ b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch @@ -0,0 +1,253 @@ +From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 16 Jul 2019 20:36:57 +0530 +Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop + volume in loop + +Problem: In brick_mux environment sometime brick is crashed while + volume stop/start in a loop.Brick is crashed in janitor task + at the time of accessing priv.If posix priv is cleaned up before + call janitor task then janitor task is crashed. + +Solution: To avoid the crash in brick_mux environment introduce a new + flag janitor_task_stop in posix_private and before send CHILD_DOWN event + wait for update the flag by janitor_task_done + +> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 +> fixes: bz#1730409 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/) + +Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 +fixex: bz#1729971 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/178934 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/xlator.h | 3 +++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++-- + xlators/protocol/server/src/server.c | 6 ++++- + xlators/storage/posix/src/posix-common.c | 40 +++++++++++++++++++++++++++++- + xlators/storage/posix/src/posix-helpers.c | 16 ++++++++++++ + xlators/storage/posix/src/posix.h | 3 +++ + 6 files changed, 69 insertions(+), 4 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index b78daad..da551e9 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -861,6 +861,9 @@ struct _xlator { + + /* Flag to notify got CHILD_DOWN event for detach brick */ + uint32_t notify_down; ++ ++ /* Flag to avoid throw duplicate PARENT_DOWN event */ ++ uint32_t parent_down; + }; + + /* This would be the only structure which needs to be exported by +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2aa975b..812c698 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -4082,8 +4082,9 @@ out: + if (msg[0]) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s", + msg); +- gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s", +- new_brickinfo->hostname, new_brickinfo->path); ++ if (new_brickinfo) ++ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s", ++ new_brickinfo->hostname, new_brickinfo->path); + } + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 6ae63ba..a5f09fe 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data) + gf_boolean_t victim_found = _gf_false; + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; ++ uint32_t parent_down = 0; + + GF_ASSERT(data); + +@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data) + victim = (*trav_p)->xlator; + if (victim->cleanup_starting && + strcmp(victim->name, victim_name) == 0) { +- victim_found = _gf_true; ++ parent_down = victim->parent_down; ++ victim->parent_down = 1; ++ if (!parent_down) ++ victim_found = _gf_true; + break; + } + } +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index d738692..69857d9 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -146,10 +146,15 @@ int32_t + posix_notify(xlator_t *this, int32_t event, void *data, ...) + { + xlator_t *victim = data; ++ struct posix_private *priv = this->private; ++ int ret = 0; ++ struct timespec sleep_till = { ++ 0, ++ }; + + switch (event) { + case GF_EVENT_PARENT_UP: { +- /* Tell the parent that posix xlator is up */ ++ /* the parent that posix xlator is up */ + default_notify(this, GF_EVENT_CHILD_UP, data); + } break; + +@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + break; + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); ++ ++ if (priv->janitor) { ++ pthread_mutex_lock(&priv->janitor_mutex); ++ { ++ priv->janitor_task_stop = _gf_true; ++ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, ++ priv->janitor); ++ if (!ret) { ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ /* Wait to set janitor_task flag to _gf_false by ++ * janitor_task_done */ ++ while (priv->janitor_task_stop) { ++ (void)pthread_cond_timedwait(&priv->janitor_cond, ++ &priv->janitor_mutex, ++ &sleep_till); ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ } ++ } ++ } ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ GF_FREE(priv->janitor); ++ } ++ priv->janitor = NULL; + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: +@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this) + + pthread_mutex_init(&_private->fsync_mutex, NULL); + pthread_cond_init(&_private->fsync_cond, NULL); ++ pthread_mutex_init(&_private->janitor_mutex, NULL); ++ pthread_cond_init(&_private->janitor_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) +@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this) + (void)gf_thread_cleanup_xint(priv->disk_space_check); + priv->disk_space_check = 0; + } ++ + if (priv->janitor) { + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); +@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, + "Failed to delete janitor timer"); + } ++ GF_FREE(priv->janitor); + priv->janitor = NULL; + } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; +@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this) + GF_FREE(priv->base_path); + LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->fsync_mutex); ++ pthread_cond_destroy(&priv->fsync_cond); ++ pthread_mutex_destroy(&priv->janitor_mutex); ++ pthread_cond_destroy(&priv->janitor_cond); + GF_FREE(priv->hostname); + GF_FREE(priv->trash_path); + GF_FREE(priv); +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 07169b5..ef5bfd5 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data) + this = data; + priv = this->private; + ++ pthread_mutex_lock(&priv->janitor_mutex); ++ { ++ if (priv->janitor_task_stop) { ++ priv->janitor_task_stop = _gf_false; ++ pthread_cond_signal(&priv->janitor_cond); ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ goto out; ++ } ++ } ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ + LOCK(&priv->lock); + { + __posix_janitor_timer_start(this); + } + UNLOCK(&priv->lock); + ++out: + return 0; + } + +@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data) + old_this = THIS; + THIS = this; + ++ if (!priv) ++ goto out; ++ + time(&now); + if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { + if (priv->disable_landfill_purge) { +@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data) + + THIS = old_this; + ++out: + return 0; + } + +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index b0935a7..64288a7 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -203,6 +203,8 @@ struct posix_private { + struct list_head fsyncs; + pthread_mutex_t fsync_mutex; + pthread_cond_t fsync_cond; ++ pthread_mutex_t janitor_mutex; ++ pthread_cond_t janitor_cond; + int fsync_queue_count; + + enum { +@@ -257,6 +259,7 @@ struct posix_private { + + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; ++ gf_boolean_t janitor_task_stop; + }; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch new file mode 100644 index 0000000..38b4d48 --- /dev/null +++ b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch @@ -0,0 +1,153 @@ +From 2d7d9165c6a8619eef553859b4b7136b8e9ccb55 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Sat, 10 Aug 2019 10:30:26 +0530 +Subject: [PATCH 280/284] performance/md-cache: Do not skip caching of null + character xattr values + +Null character string is a valid xattr value in file system. But for +those xattrs processed by md-cache, it does not update its entries if +value is null('\0'). This results in ENODATA when those xattrs are +queried afterwards via getxattr() causing failures in basic operations +like create, copy etc in a specially configured Samba setup for Mac OS +clients. + +On the other side snapview-server is internally setting empty string("") +as value for xattrs received as part of listxattr() and are not intended +to be cached. Therefore we try to maintain that behaviour using an +additional dictionary key to prevent updation of entries in getxattr() +and fgetxattr() callbacks in md-cache. + +Credits: Poornima G + +Backport of https://review.gluster.org/c/glusterfs/+/23206 + +Change-Id: I7859cbad0a06ca6d788420c2a495e658699c6ff7 +Fixes: bz#1732376 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/179048 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/md-cache/bug-1726205.t | 22 +++++++++++++++ + .../features/snapview-server/src/snapview-server.c | 12 ++++++++- + xlators/performance/md-cache/src/md-cache.c | 31 +++++++++------------- + 3 files changed, 45 insertions(+), 20 deletions(-) + create mode 100644 tests/bugs/md-cache/bug-1726205.t + +diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t +new file mode 100644 +index 0000000..795130e +--- /dev/null ++++ b/tests/bugs/md-cache/bug-1726205.t +@@ -0,0 +1,22 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd; ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3}; ++ ++TEST $CLI volume start $V0 ++ ++TEST $CLI volume set $V0 group samba ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST touch $M0/file ++TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file" ++TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00" ++ ++cleanup; +diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c +index b4998b8..1d6a5e5 100644 +--- a/xlators/features/snapview-server/src/snapview-server.c ++++ b/xlators/features/snapview-server/src/snapview-server.c +@@ -828,7 +828,8 @@ out: + * back into the dict. But to get the values for those xattrs it has to do the + * getxattr operation on each xattr which might turn out to be a costly + * operation. So for each of the xattrs present in the list, a 0 byte value +- * ("") is set into the dict before unwinding. This can be treated as an ++ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr ++ * value(in a file system) we use an extra key in the same dictionary as an + * indicator to other xlators which want to cache the xattrs (as of now, + * md-cache which caches acl and selinux related xattrs) to not to cache the + * values of the xattrs present in the dict. +@@ -871,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size) + list_offset += strlen(keybuffer) + 1; + } /* while (remaining_size > 0) */ + ++ /* Add an additional key to indicate that we don't need to cache these ++ * xattrs(with value "") */ ++ ret = dict_set_str(dict, "glusterfs.skip-cache", ""); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, ++ "dict set operation for the key glusterfs.skip-cache failed."); ++ goto out; ++ } ++ + ret = 0; + + out: +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index 6e0468f..a6b363f 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -698,25 +698,6 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data) + } + } + +- /* posix xlator as part of listxattr will send both names +- * and values of the xattrs in the dict. But as per man page +- * listxattr is mainly supposed to send names of the all the +- * xattrs. gfapi, as of now will put all the keys it obtained +- * in the dict (sent by posix) into a buffer provided by the +- * caller (thus the values of those xattrs are lost). If some +- * xlator makes gfapi based calls (ex: snapview-server), then +- * it has to unwind the calls by putting those names it got +- * in the buffer again into the dict. But now it would not be +- * having the values for those xattrs. So it might just put +- * a 0 byte value ("") into the dict for each xattr and unwind +- * the call. So the xlators which cache the xattrs (as of now +- * md-cache caches the acl and selinux related xattrs), should +- * not update their cache if the value of a xattr is a 0 byte +- * data (i.e. ""). +- */ +- if (value->len == 1 && value->data[0] == '\0') +- return 0; +- + if (dict_set(u->dict, key, value) < 0) { + u->ret = -1; + return -1; +@@ -2406,6 +2387,12 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + ++ if (dict_get(xattr, "glusterfs.skip-cache")) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0, ++ "Skipping xattr update due to empty value"); ++ goto out; ++ } ++ + mdc_inode_xatt_set(this, local->loc.inode, xdata); + + out: +@@ -2488,6 +2475,12 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + ++ if (dict_get(xattr, "glusterfs.skip-cache")) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0, ++ "Skipping xattr update due to empty value"); ++ goto out; ++ } ++ + mdc_inode_xatt_set(this, local->fd->inode, xdata); + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch new file mode 100644 index 0000000..5af12d1 --- /dev/null +++ b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch @@ -0,0 +1,105 @@ +From fa3cc9971bf1bf4ea52edfedc0cea67a0d6990d1 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 20 Aug 2019 15:49:40 +0530 +Subject: [PATCH 281/284] ctime: Fix incorrect realtime passed to + frame->root->ctime + +On systems that don't support "timespec_get"(e.g., centos6), it +was using "clock_gettime" with "CLOCK_MONOTONIC" to get unix epoch +time which is incorrect. This patch introduces "timespec_now_realtime" +which uses "clock_gettime" with "CLOCK_REALTIME" which fixes +the issue. + +Backport of: + > Patch: https://review.gluster.org/23274/ + > Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612 + > Signed-off-by: Kotresh HR + > fixes: bz#1743652 + +Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612 +Signed-off-by: Kotresh HR +BUG: 1743611 +Reviewed-on: https://code.engineering.redhat.com/gerrit/179185 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/timespec.h | 2 ++ + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/timespec.c | 22 ++++++++++++++++++++++ + xlators/features/utime/src/utime-helpers.c | 2 +- + 4 files changed, 26 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h +index 871871d..bb9ab44 100644 +--- a/libglusterfs/src/glusterfs/timespec.h ++++ b/libglusterfs/src/glusterfs/timespec.h +@@ -21,6 +21,8 @@ + void + timespec_now(struct timespec *ts); + void ++timespec_now_realtime(struct timespec *ts); ++void + timespec_adjust_delta(struct timespec *ts, struct timespec delta); + void + timespec_sub(const struct timespec *begin, const struct timespec *end, +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index b161380..467a1b7 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1073,6 +1073,7 @@ sys_accept + tbf_init + tbf_throttle + timespec_now ++timespec_now_realtime + timespec_sub + timespec_adjust_delta + timespec_cmp +diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c +index c01527f..d0d5005 100644 +--- a/libglusterfs/src/timespec.c ++++ b/libglusterfs/src/timespec.c +@@ -71,6 +71,28 @@ timespec_now(struct timespec *ts) + } + + void ++timespec_now_realtime(struct timespec *ts) ++{ ++#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \ ++ defined GF_BSD_HOST_OS ++ if (0 == clock_gettime(CLOCK_REALTIME, ts)) { ++ return; ++ } ++#endif ++ ++ /* Fall back to gettimeofday()*/ ++ struct timeval tv = { ++ 0, ++ }; ++ if (0 == gettimeofday(&tv, NULL)) { ++ TIMEVAL_TO_TIMESPEC(&tv, ts); ++ return; ++ } ++ ++ return; ++} ++ ++void + timespec_adjust_delta(struct timespec *ts, struct timespec delta) + { + ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000); +diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c +index 79cc014..29d9ad9 100644 +--- a/xlators/features/utime/src/utime-helpers.c ++++ b/xlators/features/utime/src/utime-helpers.c +@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts) + #ifdef TIME_UTC + timespec_get(ts, TIME_UTC); + #else +- timespec_now(ts); ++ timespec_now_realtime(ts); + #endif + } + +-- +1.8.3.1 + diff --git a/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch new file mode 100644 index 0000000..37a0f12 --- /dev/null +++ b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch @@ -0,0 +1,116 @@ +From 98c9fc8d774ae153ca6b44d3337cf5d9f7a030e2 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 16 Aug 2019 16:07:03 +0530 +Subject: [PATCH 282/284] geo-rep: Fix the name of changelog archive file + +Background: +The processed changelogs are archived each month in a single tar file. +The default format is "archive_YYYYMM.tar" which is specified as "%%Y%%m" +in configuration file. + +Problem: +The created changelog archive file didn't have corresponding year +and month. It created as "archive_%Y%m.tar" on python2 only systems. + +Cause and Fix: +Geo-rep expects "%Y%m" after the ConfigParser reads it from config file. +Since it was "%%Y%%m" in config file, geo-rep used to get correct value +"%Y%m" in python3 and "%%Y%%m" in python2 which is incorrect. +The fix can be to use "%Y%m" in config file but that fails in python3. +So the fix is to use "RawConfigParser" in geo-rep and use "%Y%m". This +works both in python2 and python3. + +Backport of: + > Patch: https://review.gluster.org/23248 + > Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c + > fixes: bz#1741890 + > Signed-off-by: Kotresh HR + +Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c +BUG: 1743634 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/179188 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/gsyncd.conf.in | 2 +- + geo-replication/syncdaemon/gsyncdconfig.py | 14 +++++++------- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index c2e4f0d..5ebd57a 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -109,7 +109,7 @@ type=int + help=Minimum time interval in seconds for passive worker to become Active + + [changelog-archive-format] +-value=%%Y%%m ++value=%Y%m + help=Processed changelogs will be archived in working directory. Pattern for archive file + + [use-meta-volume] +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 38f3594..f823311 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -10,9 +10,9 @@ + # + + try: +- from ConfigParser import ConfigParser, NoSectionError ++ from ConfigParser import RawConfigParser, NoSectionError + except ImportError: +- from configparser import ConfigParser, NoSectionError ++ from configparser import RawConfigParser, NoSectionError + import os + import shutil + from string import Template +@@ -94,7 +94,7 @@ class Gconf(object): + if name != "all" and not self._is_configurable(name): + raise GconfNotConfigurable() + +- cnf = ConfigParser() ++ cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + +@@ -138,7 +138,7 @@ class Gconf(object): + if curr_val == value: + return True + +- cnf = ConfigParser() ++ cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + +@@ -178,7 +178,7 @@ class Gconf(object): + self.session_conf_items = [] + self.default_values = {} + +- conf = ConfigParser() ++ conf = RawConfigParser() + # Default Template config file + with open(self.default_conf_file) as f: + conf.readfp(f) +@@ -342,7 +342,7 @@ class Gconf(object): + return False + + def is_config_file_old(config_file, mastervol, slavevol): +- cnf = ConfigParser() ++ cnf = RawConfigParser() + cnf.read(config_file) + session_section = "peers %s %s" % (mastervol, slavevol) + try: +@@ -357,7 +357,7 @@ def config_upgrade(config_file, ret): + shutil.copyfile(config_file, config_file_backup) + + #write a new config file +- config = ConfigParser() ++ config = RawConfigParser() + config.add_section('vars') + + for key, value in ret.items(): +-- +1.8.3.1 + diff --git a/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch new file mode 100644 index 0000000..eb9d8f8 --- /dev/null +++ b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch @@ -0,0 +1,285 @@ +From 55eb2e7642e3428eaa1b2d833c0daa1d34b98324 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 8 Aug 2019 10:05:12 +0530 +Subject: [PATCH 283/284] ctime: Fix ctime issue with utime family of syscalls + +When atime|mtime is updated via utime family of syscalls, +ctime is not updated. This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23177 + > Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25 + > fixes: bz#1738786 + > Signed-off-by: Kotresh HR + +Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25 +BUG: 1743627 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/179184 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/utime/src/utime-gen-fops-c.py | 13 +++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 8 +-- + xlators/storage/posix/src/posix-metadata.c | 96 ++++++++++++++------------ + xlators/storage/posix/src/posix-metadata.h | 3 +- + 4 files changed, 68 insertions(+), 52 deletions(-) + +diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py +index a8637ff..8730a51 100755 +--- a/xlators/features/utime/src/utime-gen-fops-c.py ++++ b/xlators/features/utime/src/utime-gen-fops-c.py +@@ -82,7 +82,18 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) + { + gl_timespec_get(&frame->root->ctime); +- frame->root->flags |= MDATA_CTIME; ++ ++ if (!valid) { ++ frame->root->flags |= MDATA_CTIME; ++ } ++ ++ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { ++ frame->root->flags |= MDATA_CTIME; ++ } ++ ++ if (valid & GF_SET_ATTR_MODE) { ++ frame->root->flags |= MDATA_CTIME; ++ } + + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index d22bbc2..e0ea85b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -425,8 +425,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + real_path); + goto out; + } +- posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf, +- valid); ++ posix_update_utime_in_mdata(this, real_path, -1, loc->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { +@@ -652,8 +652,8 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + fd); + goto out; + } +- posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf, +- valid); ++ posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (!valid) { +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 5cbdc98..532daa2 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -432,8 +432,10 @@ out: + */ + static int + posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct timespec *time, struct iatt *stbuf, +- posix_mdata_flag_t *flag, gf_boolean_t update_utime) ++ inode_t *inode, struct timespec *time, ++ struct timespec *u_atime, struct timespec *u_mtime, ++ struct iatt *stbuf, posix_mdata_flag_t *flag, ++ gf_boolean_t update_utime) + { + posix_mdata_t *mdata = NULL; + int ret = -1; +@@ -443,6 +445,10 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + ++ if (update_utime && (!u_atime || !u_mtime)) { ++ goto out; ++ } ++ + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); +@@ -506,32 +512,30 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + +- /* Earlier, mdata was updated only if the existing time is less +- * than the time to be updated. This would fail the scenarios +- * where mtime can be set to any time using the syscall. Hence +- * just updating without comparison. But the ctime is not +- * allowed to changed to older date. +- */ +- +- if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) { +- mdata->ctime = *time; +- } +- + /* In distributed systems, there could be races with fops + * updating mtime/atime which could result in different + * mtime/atime for same file. So this makes sure, only the + * highest time is retained. If the mtime/atime update comes + * from the explicit utime syscall, it is allowed to set to +- * previous time ++ * previous or future time but the ctime is always set to ++ * current time. + */ + if (update_utime) { ++ if (flag->ctime && ++ posix_compare_timespec(time, &mdata->ctime) > 0) { ++ mdata->ctime = *time; ++ } + if (flag->mtime) { +- mdata->mtime = *time; ++ mdata->mtime = *u_mtime; + } + if (flag->atime) { +- mdata->atime = *time; ++ mdata->atime = *u_atime; + } + } else { ++ if (flag->ctime && ++ posix_compare_timespec(time, &mdata->ctime) > 0) { ++ mdata->ctime = *time; ++ } + if (flag->mtime && + posix_compare_timespec(time, &mdata->mtime) > 0) { + mdata->mtime = *time; +@@ -584,15 +588,22 @@ out: + */ + void + posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct iatt *stbuf, int valid) ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid) + { + int32_t ret = 0; + #if defined(HAVE_UTIMENSAT) +- struct timespec tv = { ++ struct timespec tv_atime = { ++ 0, ++ }; ++ struct timespec tv_mtime = { + 0, + }; + #else +- struct timeval tv = { ++ struct timeval tv_atime = { ++ 0, ++ }; ++ struct timeval tv_mtime = { + 0, + }; + #endif +@@ -611,35 +622,28 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + */ + if (inode && priv->ctime) { + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { +- tv.tv_sec = stbuf->ia_atime; +- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); ++ tv_atime.tv_sec = stbuf->ia_atime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec); + +- flag.ctime = 0; +- flag.mtime = 0; ++ flag.ctime = 1; + flag.atime = 1; +- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, +- &flag, _gf_true); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata atime failed on file:" +- " %s gfid:%s", +- real_path, uuid_utoa(inode->gfid)); +- } + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { +- tv.tv_sec = stbuf->ia_mtime; +- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); ++ tv_mtime.tv_sec = stbuf->ia_mtime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec); + +- flag.ctime = 0; ++ flag.ctime = 1; + flag.mtime = 1; +- flag.atime = 0; ++ } + +- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, +- &flag, _gf_true); ++ if (flag.mtime || flag.atime) { ++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime, ++ &tv_atime, &tv_mtime, NULL, &flag, ++ _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata mtime failed on file:" ++ "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } +@@ -702,8 +706,8 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, +- &frame->root->ctime, stbuf, &flag, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf, ++ &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, +@@ -733,8 +737,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, +- &frame->root->ctime, stbuf, &flag, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf, ++ &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, +@@ -792,8 +796,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, +- &frame->root->ctime, stbuf_out, &flag_dup, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf_out, ++ &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, +@@ -811,8 +815,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, +- &frame->root->ctime, stbuf_out, &flag_dup, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf_out, ++ &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, +diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h +index dc25e59..c176699 100644 +--- a/xlators/storage/posix/src/posix-metadata.h ++++ b/xlators/storage/posix/src/posix-metadata.h +@@ -40,7 +40,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf); + void + posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct iatt *stbuf, int valid); ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid); + void + posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf); +-- +1.8.3.1 + diff --git a/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch new file mode 100644 index 0000000..4078bfc --- /dev/null +++ b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch @@ -0,0 +1,61 @@ +From 243075b593c6fccbffb3e82ffcfdb58acfd68269 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 22 Aug 2019 15:51:43 +0530 +Subject: [PATCH 284/284] posix: log aio_error return codes in + posix_fs_health_check + +Problem: Sometime brick is going down to health check thread is + failed without logging error codes return by aio system calls. + As per aio_error man page it returns a positive error number + if the asynchronous I/O operation failed. + +Solution: log aio_error return codes in error message + +> Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef +> Fixes: bz#1744519 +> Signed-off-by: Mohit Agrawal +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23284/ + +Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef +BUG: 1744518 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/179211 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ef5bfd5..d143d4c 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2025,7 +2025,6 @@ posix_fs_health_check(xlator_t *this) + if (ret != 0) { + op_errno = errno; + op = "aio_write_error"; +- ret = -1; + goto out; + } + +@@ -2064,7 +2063,6 @@ posix_fs_health_check(xlator_t *this) + if (ret != 0) { + op_errno = errno; + op = "aio_read_error"; +- ret = -1; + goto out; + } + +@@ -2089,7 +2087,8 @@ out: + } + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, +- "%s() on %s returned", op, file_path); ++ "%s() on %s returned ret is %d error is %s", op, file_path, ret, ++ ret != -1 ? strerror(ret) : strerror(op_errno)); + gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, + "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, + file_path, strerror(op_errno), priv->hostname, priv->base_path, +-- +1.8.3.1 + diff --git a/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch new file mode 100644 index 0000000..12549e7 --- /dev/null +++ b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch @@ -0,0 +1,43 @@ +From 49cd9ef7487ba88796315b897823837a9cbd535e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 28 Aug 2019 09:05:20 +0530 +Subject: [PATCH 285/297] glusterd: glusterd service is getting timed out on + scaled setup + +Problem: On a three node cluster with 2000 replica volumes systemctl is getting + timed out for glusted service. + +Solution: Configure TimeoutSec 300 to wait for glusterd startup. + +> Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d +> Fixes: bz#1746228 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit c90dc63ec9eee0f43ba8e489876fdf8b8810bbdc) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23316/) + +Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d +BUG: 1746027 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/179806 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/systemd/glusterd.service.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index c33351c..f604160 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -13,6 +13,7 @@ Environment="LOG_LEVEL=INFO" + EnvironmentFile=-@sysconfdir@/sysconfig/glusterd + ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS + KillMode=process ++TimeoutSec=300 + SuccessExitStatus=15 + + [Install] +-- +1.8.3.1 + diff --git a/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch b/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch new file mode 100644 index 0000000..415a07b --- /dev/null +++ b/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch @@ -0,0 +1,162 @@ +From 2a905a8ae6b4737e84543ad76b55f3346fa0f32c Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Tue, 27 Aug 2019 14:12:31 +0530 +Subject: [PATCH 286/297] glusterfs.spec.in: added script files for machine / + component stats + +Have added the file (extras/identify-hangs.sh) to the code base. +And included the following to be packaged: + +Quota Accounting issue: +extras/quota/xattr_analysis.py (made available only on server) +extras/quota/quota_fsck.py (made available only on server) +extras/quota/log_accounting.sh + +Debugging Statedumps: +extras/identify-hangs.sh + +Performance: +extras/collect-system-stats.sh + +Note: rest of the files were already included. + +Label: DOWNSTREAM ONLY. + +Change-Id: I2efb959865c3f381166c6a25c6eef613d13dd5ee +fixes: bz#1719171 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/179515 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + extras/Makefile.am | 9 +++++++- + extras/identify-hangs.sh | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ + glusterfs.spec.in | 8 ++++++++ + 3 files changed, 69 insertions(+), 1 deletion(-) + create mode 100644 extras/identify-hangs.sh + +diff --git a/extras/Makefile.am b/extras/Makefile.am +index 983f014..8cbfda1 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -30,9 +30,14 @@ endif + + scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh ++scripts_SCRIPTS += quota/log_accounting.sh ++scripts_SCRIPTS += collect-system-stats.sh ++scripts_SCRIPTS += identify-hangs.sh + if WITH_SERVER + scripts_SCRIPTS += post-upgrade-script-for-quota.sh \ + pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh ++scripts_SCRIPTS += quota/quota_fsck.py ++scripts_SCRIPTS += quota/xattr_analysis.py + if USE_SYSTEMD + scripts_SCRIPTS += control-cpu-load.sh + scripts_SCRIPTS += control-mem.sh +@@ -50,7 +55,9 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co + command-completion/Makefile command-completion/README \ + stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \ + control-cpu-load.sh control-mem.sh group-distributed-virt \ +- thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh ++ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \ ++ quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \ ++ collect-system-stats.sh identify-hangs.sh + + if WITH_SERVER + install-data-local: +diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh +new file mode 100644 +index 0000000..ebc6bf1 +--- /dev/null ++++ b/extras/identify-hangs.sh +@@ -0,0 +1,53 @@ ++#!/bin/bash ++function get_statedump_fnames_without_timestamps ++{ ++ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u ++} ++ ++function get_non_uniq_fields ++{ ++ local statedump_fname_prefix=$1 ++ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}' ++} ++ ++function print_stack_lkowner_unique_in_one_line ++{ ++ local statedump_fname_prefix=$1 ++ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - - ++} ++ ++function get_stacks_that_appear_in_multiple_statedumps ++{ ++ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps ++ #print the stack ++ local statedump_fname_prefix=$1 ++ while read -r non_uniq_stack; ++ do ++ if [ -z "$printed" ]; ++ then ++ printed="1" ++ fi ++ echo "$statedump_fname_prefix" "$non_uniq_stack" ++ done < <(get_non_uniq_fields "$statedump_fname_prefix") ++} ++ ++statedumpdir=${1} ++if [ -z "$statedumpdir" ]; ++then ++ echo "Usage: $0 " ++ exit 1 ++fi ++ ++if [ ! -d "$statedumpdir" ]; ++then ++ echo "$statedumpdir: Is not a directory" ++ echo "Usage: $0 " ++ exit 1 ++fi ++ ++cd "$statedumpdir" || exit 1 ++for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps); ++do ++ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix" ++done | column -t ++echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 00603ec..3c2e2dc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1107,6 +1107,9 @@ exit 0 + %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh + %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh + %endif ++%{_datadir}/glusterfs/scripts/identify-hangs.sh ++%{_datadir}/glusterfs/scripts/collect-system-stats.sh ++%{_datadir}/glusterfs/scripts/log_accounting.sh + # xlators that are needed on the client- and on the server-side + %dir %{_libdir}/glusterfs + %dir %{_libdir}/glusterfs/%{version}%{?prereltag} +@@ -1352,6 +1355,8 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %files server + %doc extras/clear_xattrs.sh ++%{_datadir}/glusterfs/scripts/xattr_analysis.py* ++%{_datadir}/glusterfs/scripts/quota_fsck.py* + # sysconf + %config(noreplace) %{_sysconfdir}/glusterfs + %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol +@@ -1942,6 +1947,9 @@ fi + %endif + + %changelog ++* Tue Aug 27 2019 Hari Gowtham ++- Added scripts to collect machine stats and component stats (#1719171) ++ + * Tue Jun 18 2019 Jiffin Tony Thottan + - build glusterfs-ganesha for rhel 7 and above (#1720551) + +-- +1.8.3.1 + diff --git a/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch new file mode 100644 index 0000000..93bd3c9 --- /dev/null +++ b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch @@ -0,0 +1,372 @@ +From 546f412c155dd5aca2b3cd4202f80c9977b215dc Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 4 Sep 2019 12:06:34 +0530 +Subject: [PATCH 287/297] cluster/ec: Fail fsync/flush for files on update + size/version failure + +Problem: +If update size/version is not successful on the file, updates on the +same stripe could lead to data corruptions if the earlier un-aligned +write is not successful on all the bricks. Application won't have +any knowledge of this because update size/version happens in the +background. + +Fix: +Fail fsync/flush on fds that are opened before update-size-version +went bad. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23355 +fixes: bz#1745107 +Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/180672 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/ec/ec-badfd.c | 124 +++++++++++++++++++++++++++++++++++ + tests/basic/ec/ec-badfd.t | 26 ++++++++ + xlators/cluster/ec/src/ec-common.c | 23 +++++++ + xlators/cluster/ec/src/ec-generic.c | 47 +++++++++++++ + xlators/cluster/ec/src/ec-helpers.c | 7 ++ + xlators/cluster/ec/src/ec-messages.h | 2 +- + xlators/cluster/ec/src/ec-types.h | 2 + + 7 files changed, 230 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/ec/ec-badfd.c + create mode 100755 tests/basic/ec/ec-badfd.t + +diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c +new file mode 100644 +index 0000000..8be23c1 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.c +@@ -0,0 +1,124 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int ++fill_iov(struct iovec *iov, char fillchar, int count) ++{ ++ int ret = -1; ++ ++ iov->iov_base = malloc(count + 1); ++ if (iov->iov_base == NULL) { ++ return ret; ++ } else { ++ iov->iov_len = count; ++ ret = 0; ++ } ++ memset(iov->iov_base, fillchar, count); ++ memset(iov->iov_base + count, '\0', 1); ++ ++ return ret; ++} ++ ++int ++write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count) ++{ ++ ssize_t ret = -1; ++ int flags = O_RDWR; ++ struct iovec iov = {0}; ++ ++ ret = fill_iov(&iov, 'a', char_count); ++ if (ret) { ++ fprintf(stderr, "failed to create iov"); ++ goto out; ++ } ++ ++ ret = glfs_pwritev(glfd, &iov, 1, 0, flags); ++out: ++ if (ret < 0) { ++ fprintf(stderr, "glfs_pwritev failed, %d", errno); ++ } ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ int ret = 1; ++ char volume_cmd[4096] = {0}; ++ ++ if (argc != 4) { ++ fprintf(stderr, "Syntax: %s \n", argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new(argv[2]); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_init(fs); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ fd = glfs_open(fs, argv[3], O_RDWR); ++ if (fd == NULL) { ++ fprintf(stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ ret = write_sync(fs, fd, 16); ++ if (ret < 0) { ++ fprintf(stderr, "write_sync failed\n"); ++ } ++ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume stop %s", argv[2]); ++ /*Stop the volume so that update-size-version fails*/ ++ system(volume_cmd); ++ sleep(8); /* 3 seconds more than eager-lock-timeout*/ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume start %s", argv[2]); ++ system(volume_cmd); ++ sleep(8); /*wait for bricks to come up*/ ++ ret = glfs_fsync(fd, NULL, NULL); ++ if (ret == 0) { ++ fprintf(stderr, "fsync succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ++ ret = glfs_close(fd); ++ if (ret == 0) { ++ fprintf(stderr, "flush succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ret = 0; ++ ++out: ++ unlink("/tmp/ec-badfd.log"); ++ glfs_fini(fs); ++ ++ return ret; ++} +diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t +new file mode 100755 +index 0000000..56feb47 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.t +@@ -0,0 +1,26 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6} ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 ++ ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++TEST $GFS -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++TEST touch $M0/file ++ ++TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2 ++TEST $(dirname $0)/ec-badfd $H0 $V0 /file ++cleanup_tester $(dirname ${0})/ec-badfd ++ ++cleanup; +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5fb4610..92d4e5d 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link) + } + } + ++void ++ec_inode_bad_inc(inode_t *inode, xlator_t *xl) ++{ ++ ec_inode_t *ctx = NULL; ++ ++ LOCK(&inode->lock); ++ { ++ ctx = __ec_inode_get(inode, xl); ++ if (ctx == NULL) { ++ goto unlock; ++ } ++ ctx->bad_version++; ++ } ++unlock: ++ UNLOCK(&inode->lock); ++} ++ + int32_t + ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, +@@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + ctx = lock->ctx; + + if (op_ret < 0) { ++ if (link->lock->fd == NULL) { ++ ec_inode_bad_inc(link->lock->loc.inode, this); ++ } else { ++ ec_inode_bad_inc(link->lock->fd->inode, this); ++ } ++ + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, + EC_MSG_SIZE_VERS_UPDATE_FAIL, + "Failed to update version and size. %s", ec_msg_str(fop)); +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index acc16b5..b019050 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -150,6 +150,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) + } + } + ++static int32_t ++ec_validate_fd(fd_t *fd, xlator_t *xl) ++{ ++ uint64_t iversion = 0; ++ uint64_t fversion = 0; ++ ec_inode_t *inode_ctx = NULL; ++ ec_fd_t *fd_ctx = NULL; ++ ++ LOCK(&fd->lock); ++ { ++ fd_ctx = __ec_fd_get(fd, xl); ++ if (fd_ctx) { ++ fversion = fd_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->lock); ++ ++ LOCK(&fd->inode->lock); ++ { ++ inode_ctx = __ec_inode_get(fd->inode, xl); ++ if (inode_ctx) { ++ iversion = inode_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->inode->lock); ++ if (fversion < iversion) { ++ return EBADF; ++ } ++ return 0; ++} ++ + void + ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, +@@ -165,6 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, + ec_wind_flush, ec_manager_flush, callback, data); + if (fop == NULL) { +@@ -381,6 +420,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, + ec_wind_fsync, ec_manager_fsync, callback, data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c +index 43f6e3b..baac001 100644 +--- a/xlators/cluster/ec/src/ec-helpers.c ++++ b/xlators/cluster/ec/src/ec-helpers.c +@@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + { + int i = 0; + ec_fd_t *ctx = NULL; ++ ec_inode_t *ictx = NULL; + uint64_t value = 0; + ec_t *ec = xl->private; + +@@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + GF_FREE(ctx); + return NULL; + } ++ /* Only refering bad-version so no need for lock ++ * */ ++ ictx = __ec_inode_get(fd->inode, xl); ++ if (ictx) { ++ ctx->bad_version = ictx->bad_version; ++ } + } + } else { + ctx = (ec_fd_t *)(uintptr_t)value; +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index 7c28808..be86b37 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,6 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, EC_MSG_FD_BAD); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 1c295c0..f27f2ec 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -150,6 +150,7 @@ struct _ec_fd { + loc_t loc; + uintptr_t open; + int32_t flags; ++ uint64_t bad_version; + ec_fd_status_t fd_status[0]; + }; + +@@ -180,6 +181,7 @@ struct _ec_inode { + uint64_t dirty[2]; + struct list_head heal; + ec_stripe_list_t stripe_cache; ++ uint64_t bad_version; + }; + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, +-- +1.8.3.1 + diff --git a/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch new file mode 100644 index 0000000..8dd3fca --- /dev/null +++ b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch @@ -0,0 +1,77 @@ +From ccf7775760dd923e21341438725946737eb8d8af Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Sat, 7 Sep 2019 20:18:01 +0530 +Subject: [PATCH 288/297] cluster/ec: Fix coverity issues + +Fixed the following coverity issue in both flush/fsync +>>> CID 1404964: Null pointer dereferences (REVERSE_INULL) +>>> Null-checking "fd" suggests that it may be null, but it has already +been dereferenced on all paths leading to the check. +>>> if (fd != NULL) { +>>> fop->fd = fd_ref(fd); +>>> if (fop->fd == NULL) { +>>> gf_msg(this->name, GF_LOG_ERROR, 0, +>>> "Failed to reference a " +>>> "file descriptor."); + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23382 +fixes: bz#1745107 +Change-Id: I19c05d585e23f8fbfbc195d1f3775ec528eed671 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/180673 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-generic.c | 28 ++++++++++++++++------------ + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index b019050..192bb02 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -196,12 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- error = ec_validate_fd(fd, this); +- if (error) { +- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, +- "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], +- fd->inode ? uuid_utoa(fd->inode->gfid) : ""); +- goto out; ++ if (fd) { ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } + } + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, +@@ -420,12 +422,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- error = ec_validate_fd(fd, this); +- if (error) { +- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, +- "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], +- fd->inode ? uuid_utoa(fd->inode->gfid) : ""); +- goto out; ++ if (fd) { ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } + } + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, +-- +1.8.3.1 + diff --git a/SOURCES/0289-cluster-ec-quorum-count-implementation.patch b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch new file mode 100644 index 0000000..6d24813 --- /dev/null +++ b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch @@ -0,0 +1,721 @@ +From 0d54bb417e982a100ceefb5eab2a61a17e840f39 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 5 Sep 2019 16:12:39 +0530 +Subject: [PATCH 289/297] cluster/ec: quorum-count implementation + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23366 +upstream-issue: #721 +fixes: bz#1748688 +Change-Id: I5333540e3c635ccf441cf1f4696e4c8986e38ea8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/180674 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + tests/basic/ec/ec-quorum-count-partial-failure.t | 50 +++++++ + tests/basic/ec/ec-quorum-count.t | 165 +++++++++++++++++++++++ + tests/ec.rc | 9 ++ + xlators/cluster/ec/src/ec-common.c | 13 ++ + xlators/cluster/ec/src/ec-common.h | 24 ++++ + xlators/cluster/ec/src/ec-dir-write.c | 57 ++++---- + xlators/cluster/ec/src/ec-inode-write.c | 61 ++++----- + xlators/cluster/ec/src/ec-types.h | 1 + + xlators/cluster/ec/src/ec.c | 13 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 46 +++++++ + 11 files changed, 383 insertions(+), 60 deletions(-) + create mode 100755 tests/basic/ec/ec-quorum-count-partial-failure.t + create mode 100644 tests/basic/ec/ec-quorum-count.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 55476f6..bdc8b3d 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -136,6 +136,8 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + ++#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/basic/ec/ec-quorum-count-partial-failure.t +new file mode 100755 +index 0000000..79f5825 +--- /dev/null ++++ b/tests/basic/ec/ec-quorum-count-partial-failure.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++#This test checks that partial failure of fop results in main fop failure only ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5} ++TEST $CLI volume set $V0 performance.flush-behind off ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1 ++TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1 ++TEST cp $M0/b $M0/c ++TEST fallocate -p -l 101 $M0/c ++TEST $CLI volume stop $V0 ++TEST $CLI volume set $V0 debug.delay-gen posix; ++TEST $CLI volume set $V0 delay-gen.delay-duration 10000000; ++TEST $CLI volume set $V0 delay-gen.enable WRITE; ++TEST $CLI volume set $V0 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V0 disperse.quorum-count 6 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}') ++truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure ++fallocate -p -l 101 $M0/b & ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST wait ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++EXPECT "12345" stat --format=%s $M0/a ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0; ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 ++cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}') ++TEST [[ $cksum == $cksum_after_heal ]] ++cksum=$(dd if=$M0/c | md5sum | awk '{print $1}') ++cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}') ++TEST [[ $cksum == $cksum_after_heal ]] ++ ++cleanup; +diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t +new file mode 100644 +index 0000000..56b5329 +--- /dev/null ++++ b/tests/basic/ec/ec-quorum-count.t +@@ -0,0 +1,165 @@ ++ #!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../ec.rc ++ ++cleanup ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5} ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 ++TEST $CLI volume set $V0 performance.flush-behind off ++ ++#Should fail on non-disperse volume ++TEST ! $CLI volume set $V1 disperse.quorum-count 5 ++ ++#Should succeed on a valid range ++TEST ! $CLI volume set $V0 disperse.quorum-count 0 ++TEST ! $CLI volume set $V0 disperse.quorum-count -0 ++TEST ! $CLI volume set $V0 disperse.quorum-count abc ++TEST ! $CLI volume set $V0 disperse.quorum-count 10abc ++TEST ! $CLI volume set $V0 disperse.quorum-count 1 ++TEST ! $CLI volume set $V0 disperse.quorum-count 2 ++TEST ! $CLI volume set $V0 disperse.quorum-count 3 ++TEST $CLI volume set $V0 disperse.quorum-count 4 ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++#Test that the option is reflected in the mount ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count ++TEST $CLI volume reset $V0 disperse.quorum-count ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count ++TEST $CLI volume set $V0 disperse.quorum-count 6 ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count ++ ++TEST touch $M0/a ++TEST touch $M0/data ++TEST setfattr -n trusted.def -v def $M0/a ++TEST touch $M0/src ++TEST touch $M0/del-me ++TEST mkdir $M0/dir1 ++TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct ++TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct ++TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file ++#modify operations should fail as the file is not in quorum ++TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++#Read should succeed even when quorum-count is not met ++TEST dd if=$M0/read-file of=/dev/null iflag=direct ++TEST ! touch $M0/a2 ++TEST ! mkdir $M0/dir2 ++TEST ! mknod $M0/b2 b 4 5 ++TEST ! ln -s $M0/a $M0/symlink ++TEST ! ln $M0/a $M0/link ++TEST ! mv $M0/src $M0/dst ++TEST ! rm -f $M0/del-me ++TEST ! rmdir $M0/dir1 ++TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc ++TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc ++TEST ! truncate -s 0 $M0/a ++TEST ! setfattr -n trusted.abc -v abc $M0/a ++TEST ! setfattr -x trusted.def $M0/a ++TEST ! chmod +x $M0/a ++TEST ! fallocate -l 2m -n $M0/a ++TEST ! fallocate -p -l 512k $M0/a ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++ ++# reset the option and check whether the default redundancy count is ++# accepted or not. ++TEST $CLI volume reset $V0 disperse.quorum-count ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count ++TEST touch $M0/a1 ++TEST touch $M0/data1 ++TEST setfattr -n trusted.def -v def $M0/a1 ++TEST touch $M0/src1 ++TEST touch $M0/del-me1 ++TEST mkdir $M0/dir11 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/a21 ++TEST mkdir $M0/dir21 ++TEST mknod $M0/b21 b 4 5 ++TEST ln -s $M0/a1 $M0/symlink1 ++TEST ln $M0/a1 $M0/link1 ++TEST mv $M0/src1 $M0/dst1 ++TEST rm -f $M0/del-me1 ++TEST rmdir $M0/dir11 ++TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc ++TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc ++TEST truncate -s 0 $M0/a1 ++TEST setfattr -n trusted.abc -v abc $M0/a1 ++TEST setfattr -x trusted.def $M0/a1 ++TEST chmod +x $M0/a1 ++TEST fallocate -l 2m -n $M0/a1 ++TEST fallocate -p -l 512k $M0/a1 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST touch $M0/a2 ++TEST touch $M0/data2 ++TEST setfattr -n trusted.def -v def $M0/a1 ++TEST touch $M0/src2 ++TEST touch $M0/del-me2 ++TEST mkdir $M0/dir12 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++TEST ! touch $M0/a22 ++TEST ! mkdir $M0/dir22 ++TEST ! mknod $M0/b22 b 4 5 ++TEST ! ln -s $M0/a2 $M0/symlink2 ++TEST ! ln $M0/a2 $M0/link2 ++TEST ! mv $M0/src2 $M0/dst2 ++TEST ! rm -f $M0/del-me2 ++TEST ! rmdir $M0/dir12 ++TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc ++TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc ++TEST ! truncate -s 0 $M0/a2 ++TEST ! setfattr -n trusted.abc -v abc $M0/a2 ++TEST ! setfattr -x trusted.def $M0/a2 ++TEST ! chmod +x $M0/a2 ++TEST ! fallocate -l 2m -n $M0/a2 ++TEST ! fallocate -p -l 512k $M0/a2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++ ++# Set quorum-count to 5 and kill 1 brick and the fops should pass ++TEST $CLI volume set $V0 disperse.quorum-count 5 ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count ++TEST touch $M0/a3 ++TEST touch $M0/data3 ++TEST setfattr -n trusted.def -v def $M0/a3 ++TEST touch $M0/src3 ++TEST touch $M0/del-me3 ++TEST mkdir $M0/dir13 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST touch $M0/a31 ++TEST mkdir $M0/dir31 ++TEST mknod $M0/b31 b 4 5 ++TEST ln -s $M0/a3 $M0/symlink3 ++TEST ln $M0/a3 $M0/link3 ++TEST mv $M0/src3 $M0/dst3 ++TEST rm -f $M0/del-me3 ++TEST rmdir $M0/dir13 ++TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc ++TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc ++TEST truncate -s 0 $M0/a3 ++TEST setfattr -n trusted.abc -v abc $M0/a3 ++TEST setfattr -x trusted.def $M0/a3 ++TEST chmod +x $M0/a3 ++TEST fallocate -l 2m -n $M0/a3 ++TEST fallocate -p -l 512k $M0/a3 ++TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct ++cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')" ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++TEST kill_brick $V0 $H0 $B0/${V0}5 ++cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}') ++TEST [[ $cksum_before_heal == $cksum_after_heal ]] ++cleanup; +diff --git a/tests/ec.rc b/tests/ec.rc +index 04405ec..f18752f 100644 +--- a/tests/ec.rc ++++ b/tests/ec.rc +@@ -7,3 +7,12 @@ function ec_up_status() + local ec_id=$3 + grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'=' + } ++ ++function ec_option_value() ++{ ++ local v=$1 ++ local m=$2 ++ local ec_id=$3 ++ local opt=$4 ++ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}' ++} +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 92d4e5d..2e59180 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -707,6 +707,19 @@ ec_child_select(ec_fop_data_t *fop) + return 0; + } + ++ if (!fop->parent && fop->lock_count && ++ (fop->locks[0].update[EC_DATA_TXN] || ++ fop->locks[0].update[EC_METADATA_TXN])) { ++ if (ec->quorum_count && (num < ec->quorum_count)) { ++ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, ++ "Insufficient available children " ++ "for this request (have %d, need " ++ "%d). %s", ++ num, ec->quorum_count, ec_msg_str(fop)); ++ return 0; ++ } ++ } ++ + return 1; + } + +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 3c69471..eab86ee 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -26,6 +26,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t; + + #define EC_FLAG_LOCK_SHARED 0x0001 + ++#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \ ++ do { \ ++ ec_t *__ec = fop->xl->private; \ ++ int32_t __op_ret = 0; \ ++ int32_t __op_errno = 0; \ ++ int32_t __success_count = gf_bits_count(fop->good); \ ++ \ ++ __op_ret = op_ret; \ ++ __op_errno = op_errno; \ ++ if (!fop->parent && frame && \ ++ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \ ++ __ec->quorum_count && (__success_count < __ec->quorum_count) && \ ++ op_ret >= 0) { \ ++ __op_ret = -1; \ ++ __op_errno = EIO; \ ++ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \ ++ EC_MSG_CHILDS_INSUFFICIENT, \ ++ "Insufficient available children for this request " \ ++ "(have %d, need %d). %s", \ ++ __success_count, __ec->quorum_count, ec_msg_str(fop)); \ ++ } \ ++ fn(frame, cookie, this, __op_ret, __op_errno, params); \ ++ } while (0) ++ + enum _ec_xattrop_flags { + EC_FLAG_XATTROP, + EC_FLAG_DATA_DIRTY, +diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c +index 0b8ee21..8192462 100644 +--- a/xlators/cluster/ec/src/ec-dir-write.c ++++ b/xlators/cluster/ec/src/ec-dir-write.c +@@ -218,10 +218,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.create != NULL) { +- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->fd, fop->loc[0].inode, +- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->fd, ++ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], ++ &cbk->iatt[2], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -390,9 +390,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.link != NULL) { +- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -569,9 +570,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.mkdir != NULL) { +- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -773,9 +775,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.mknod != NULL) { +- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -931,10 +934,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.rename != NULL) { +- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3], ++ &cbk->iatt[4], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1083,9 +1086,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.rmdir != NULL) { +- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1237,10 +1240,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.symlink != NULL) { +- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, +- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1392,9 +1395,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.unlink != NULL) { +- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index 8bfa3b4..2dbb4db 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -185,26 +185,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + switch (fop->id) { + case GF_FOP_SETXATTR: + if (fop->cbks.setxattr) { +- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret, ++ op_errno, xdata); + } + break; + case GF_FOP_REMOVEXATTR: + if (fop->cbks.removexattr) { +- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + case GF_FOP_FSETXATTR: + if (fop->cbks.fsetxattr) { +- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + case GF_FOP_FREMOVEXATTR: + if (fop->cbks.fremovexattr) { +- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + } +@@ -494,16 +494,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state) + + if (fop->id == GF_FOP_SETATTR) { + if (fop->cbks.setattr != NULL) { +- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], +- &cbk->iatt[1], cbk->xdata); ++ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } else { + if (fop->cbks.fsetattr != NULL) { +- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } + +@@ -994,9 +993,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.fallocate != NULL) { +- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1247,9 +1246,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.discard != NULL) { +- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1477,17 +1476,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + + if (fop->id == GF_FOP_TRUNCATE) { + if (fop->cbks.truncate != NULL) { +- fop->cbks.truncate(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } else { + if (fop->cbks.ftruncate != NULL) { +- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } + +@@ -2245,9 +2242,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.writev != NULL) { +- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f27f2ec..ea4f6ad 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -654,6 +654,7 @@ struct _ec { + gf_boolean_t optimistic_changelog; + gf_boolean_t parallel_writes; + uint32_t stripe_cache; ++ uint32_t quorum_count; + uint32_t background_heals; + uint32_t heal_wait_qlen; + uint32_t self_heal_window_size; /* max size of read/writes */ +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3c8013e..19094c4 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options) + GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool, + failed); + GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed); ++ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed); + ret = 0; + if (ec_assign_read_policy(ec, read_policy)) { + ret = -1; +@@ -720,6 +721,7 @@ init(xlator_t *this) + failed); + GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed); + GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); ++ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); + + this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); + if (!this->itable) +@@ -1402,6 +1404,7 @@ ec_dump_private(xlator_t *this) + gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters); + gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]); + gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes); ++ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count); + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache", + this->type, this->name); +@@ -1672,6 +1675,16 @@ struct volume_options options[] = { + "lead to extra memory consumption, maximum " + "(cache size * stripe size) Bytes per open file."}, + { ++ .key = {"quorum-count"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "0", ++ .description = ++ "This option can be used to define how many successes on" ++ "the bricks constitute a success to the application. This" ++ " count should be in the range" ++ "[disperse-data-count, disperse-count] (inclusive)", ++ }, ++ { + .key = {NULL}, + }, + }; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 8ce338e..7ca47a6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1128,6 +1128,42 @@ out: + } + + static int ++validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char *value, char **op_errstr) ++{ ++ int ret = -1; ++ int quorum_count = 0; ++ int data_count = 0; ++ ++ ret = gf_string2int(value, &quorum_count); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "%s is not an integer. %s expects a " ++ "valid integer value.", ++ value, key); ++ goto out; ++ } ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) { ++ gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key); ++ ret = -1; ++ goto out; ++ } ++ ++ data_count = volinfo->disperse_count - volinfo->redundancy_count; ++ if (quorum_count < data_count || quorum_count > volinfo->disperse_count) { ++ gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]", ++ quorum_count, key, data_count, volinfo->disperse_count); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int + validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) + { +@@ -3663,6 +3699,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "disperse.quorum-count", ++ .voltype = "cluster/disperse", ++ .type = NO_DOC, ++ .op_version = GD_OP_VERSION_8_0, ++ .validate_fn = validate_disperse_quorum_count, ++ .description = "This option can be used to define how many successes on" ++ "the bricks constitute a success to the application. This" ++ " count should be in the range" ++ "[disperse-data-count, disperse-count] (inclusive)", ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.sdfs", + .voltype = "features/sdfs", +-- +1.8.3.1 + diff --git a/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch new file mode 100644 index 0000000..01ea8c2 --- /dev/null +++ b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch @@ -0,0 +1,84 @@ +From 312da653ac80b537af06139f8d83a63180c72461 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 10 Sep 2019 14:04:17 +0530 +Subject: [PATCH 290/297] glusterd: tag disperse.quorum-count for 31306 + +In upstream disperse.quorum-count is makred for release-8 +latest new op-version is 31306. + +Label: DOWNSTREAM ONLY + +fixes: bz#1748688 +Change-Id: I88fdbd56ce3b8475b5ec670659adaa9d11c01d97 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/180675 +Reviewed-by: Ashish Pandey +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/globals.h | 12 ++++++------ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index bdc8b3d..e218285 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,19 +50,19 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ + versions */ + +-#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ ++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ + + #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0 + +-#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ ++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ + +-#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ ++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ + + #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */ + +@@ -124,6 +124,8 @@ + + #define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */ + ++#define GD_OP_VERSION_3_13_6 31306 /* Op-version for GlusterFS 3.13.6 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +@@ -136,8 +138,6 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + +-#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ +- + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 7ca47a6..16601a2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3702,7 +3702,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "disperse.quorum-count", + .voltype = "cluster/disperse", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_8_0, ++ .op_version = GD_OP_VERSION_3_13_6, + .validate_fn = validate_disperse_quorum_count, + .description = "This option can be used to define how many successes on" + "the bricks constitute a success to the application. This" +-- +1.8.3.1 + diff --git a/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch new file mode 100644 index 0000000..efdbc23 --- /dev/null +++ b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch @@ -0,0 +1,106 @@ +From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 20 Aug 2019 13:27:24 +0530 +Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired + +Problem: +Mount-1 Mount-2 +1)Tries to acquire lock on 'dir1' 1)Tries to acquire lock on 'dir1' +2)Lock is granted on brick-0 2)Lock gets EAGAIN on brick-0 and + leads to blocking lock on brick-0 +3)Gets a lock-contention 3) Doesn't matter what happens on mount-2 + notification, marks lock->release from here on. + to true. +4)New fop comes on 'dir1' which will + be put in frozen list as lock->release + is set to true. +5) Lock acquisition from step-2 fails because +3 bricks went down in 4+2 setup. + +Fop on mount-1 which is put in frozen list will hang because no codepath will +move it from frozen list to any other list and the lock will not be retried. + +Fix: +Don't set lock->release to true if lock is not acquired at the time of +lock-contention-notification + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23272 +fixes: bz#1731896 +Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/180870 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 1 + + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 2e59180..5cae37b 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link) + LOCK(&lock->loc.inode->lock); + + lock->acquired = _gf_true; ++ if (lock->contention) { ++ lock->release = _gf_true; ++ lock->contention = _gf_false; ++ } + + ec_lock_update_fd(lock, fop); + ec_lock_wake_shared(lock, &list); +@@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + ec_lock_link_t *link = NULL; + ec_lock_t *lock = NULL; + ++ link = fop->data; ++ lock = link->lock; + if (op_ret >= 0) { +- link = fop->data; +- lock = link->lock; + lock->mask = lock->good_mask = fop->good; + lock->healing = 0; + + ec_lock_acquired(link); + ec_lock(fop->parent); + } else { ++ LOCK(&lock->loc.inode->lock); ++ { ++ lock->contention = _gf_false; ++ } ++ UNLOCK(&lock->loc.inode->lock); + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED, + "Failed to complete preop lock"); + } +@@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode) + gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention", + inode); + ++ if (!lock->acquired) { ++ /* This happens if some bricks already got the lock while inodelk is in ++ * progress. Set release to true after lock is acquired*/ ++ lock->contention = _gf_true; ++ goto done; ++ } ++ + /* The lock is not marked to be released, so the frozen list should be + * empty. */ + GF_ASSERT(list_empty(&lock->frozen)); +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index ea4f6ad..34a9768 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -267,6 +267,7 @@ struct _ec_lock { + uint32_t refs_pending; /* Refs assigned to fops being prepared */ + uint32_t waiting_flags; /*Track xattrop/dirty marking*/ + gf_boolean_t acquired; ++ gf_boolean_t contention; + gf_boolean_t unlock_now; + gf_boolean_t release; + gf_boolean_t query; +-- +1.8.3.1 + diff --git a/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch new file mode 100644 index 0000000..07fc8f4 --- /dev/null +++ b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch @@ -0,0 +1,72 @@ +From 769263ad422e3c1069de0994ff2274044982b242 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Sun, 1 Sep 2019 12:01:09 +0530 +Subject: [PATCH 292/297] rpc: Update address family if it is not provide in + cmd-line arguments + +Problem: After enabling transport-type to inet6 and passed ipv6 + transport.socket.bind-address in glusterd.vol clients are + not started. + +Solution: Need to update address-family based on remote-address for + all gluster client process + +> Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31 +> Fixes: bz#1747746 +> Credits: Amgad Saleh +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit 80b8cfe3f1386606bada97a76a0cad7acdf6b877) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23340/) + +Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31 +BUG: 1750241 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/181184 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/name.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c +index 7f18cc4..b473f3b 100644 +--- a/rpc/rpc-transport/socket/src/name.c ++++ b/rpc/rpc-transport/socket/src/name.c +@@ -214,6 +214,7 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this, + uint16_t remote_port = 0; + struct addrinfo *addr_info = NULL; + int32_t ret = 0; ++ struct in6_addr serveraddr; + + remote_host_data = dict_get(options, "remote-host"); + if (remote_host_data == NULL) { +@@ -249,6 +250,13 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this, + goto err; + } + ++ /* Need to update transport-address family if address-family is not provide ++ to command-line arguments ++ */ ++ if (inet_pton(AF_INET6, remote_host, &serveraddr)) { ++ sockaddr->sa_family = AF_INET6; ++ } ++ + /* TODO: gf_resolve is a blocking call. kick in some + non blocking dns techniques */ + ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family, +@@ -522,7 +530,10 @@ socket_client_get_remote_sockaddr(rpc_transport_t *this, + ret = -1; + } + +- if (*sa_family == AF_UNSPEC) { ++ /* Address-family is updated based on remote_host in ++ af_inet_client_get_remote_sockaddr ++ */ ++ if (*sa_family != sockaddr->sa_family) { + *sa_family = sockaddr->sa_family; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch new file mode 100644 index 0000000..23120cb --- /dev/null +++ b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch @@ -0,0 +1,69 @@ +From 8f89aef9691b0806d7487525c6a54a1a615c8bc1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 2 Sep 2019 10:46:10 +0530 +Subject: [PATCH 293/297] glusterd: IPV6 hostname address is not parsed + correctly + +Problem: IPV6 hostname address is not parsed correctly in function + glusterd_check_brick_order + +Solution: Update the code to parse hostname address + +> Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab +> Fixes: bz#1747746 +> Credits: Amgad Saleh +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit 6563ffb04d7ba51a89726e7c5bbb85c7dbc685b5) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23341/) + +Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab +BUG: 1750241 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/181185 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 1ea8ba6..076bc80 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -95,6 +95,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str) + int32_t type = GF_CLUSTER_TYPE_NONE; + int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; ++ char brick_addr[128] = { ++ 0, ++ }; ++ int addrlen = 0; + + const char failed_string[2048] = + "Failed to perform brick order " +@@ -182,15 +186,17 @@ glusterd_check_brick_order(dict_t *dict, char *err_str) + brick_list_dup = tmpptr; + if (brick == NULL) + goto check_failed; +- brick = strtok_r(brick, ":", &tmpptr); +- if (brick == NULL) ++ tmpptr = strrchr(brick, ':'); ++ if (tmpptr == NULL) + goto check_failed; +- ret = getaddrinfo(brick, NULL, NULL, &ai_info); ++ addrlen = strlen(brick) - strlen(tmpptr); ++ strncpy(brick_addr, brick, addrlen); ++ brick_addr[addrlen] = '\0'; ++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, +- "unable to resolve " +- "host name"); ++ "unable to resolve host name for addr %s", brick_addr); + goto out; + } + ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); +-- +1.8.3.1 + diff --git a/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch new file mode 100644 index 0000000..1665185 --- /dev/null +++ b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch @@ -0,0 +1,59 @@ +From 2fa5476b95d4547bdde50f2281bf58b7db24e37a Mon Sep 17 00:00:00 2001 +From: Aravinda VK +Date: Mon, 16 Sep 2019 10:04:26 +0530 +Subject: [PATCH 294/297] eventsapi: Set IPv4/IPv6 family based on input IP + +server.sin_family was set to AF_INET while creating socket connection, +this was failing if the input address is IPv6(`::1`). + +With this patch, sin_family is set by reading the ai_family of +`getaddrinfo` result. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23423/ + +>Fixes: bz#1752330 +>Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb +>Signed-off-by: Aravinda VK + +BUG: 1732443 +Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb +Signed-off-by: Aravinda VK +Reviewed-on: https://code.engineering.redhat.com/gerrit/181197 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/events.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 2509767..9d33783 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -42,6 +42,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo hints; + struct addrinfo *result = NULL; + xlator_t *this = THIS; ++ int sin_family = AF_INET; + + /* Global context */ + ctx = THIS->ctx; +@@ -75,13 +76,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + ret = EVENT_ERROR_RESOLVE; + goto out; + } ++ ++ sin_family = result->ai_family; + } else { + /* Localhost, Use the defined IP for localhost */ + host = gf_strdup(EVENT_HOST); + } + + /* Socket Configurations */ +- server.sin_family = AF_INET; ++ server.sin_family = sin_family; + server.sin_port = htons(EVENT_PORT); + ret = inet_pton(server.sin_family, host, &server.sin_addr); + if (ret <= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch new file mode 100644 index 0000000..9d3820d --- /dev/null +++ b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch @@ -0,0 +1,1164 @@ +From d5ce2300f77c25b38a076d4dd6a5521e82c56172 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Mon, 29 Jul 2019 18:30:42 +0530 +Subject: [PATCH 295/297] ctime/rebalance: Heal ctime xattr on directory during + rebalance + +After add-brick and rebalance, the ctime xattr is not present +on rebalanced directories on new brick. This patch fixes the +same. + +Note that ctime still doesn't support consistent time across +distribute sub-volume. + +This patch also fixes the in-memory inconsistency of time attributes +when metadata is self healed. + +Backport of: + > Patch: https://review.gluster.org/23127/ + > Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df + > fixes: bz#1734026 + > Signed-off-by: Kotresh HR + +Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df +BUG: 1728673 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/181105 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/afr/split-brain-healing-ctime.t | 253 +++++++++++++++++++++ + tests/basic/afr/split-brain-healing.t | 1 + + tests/basic/ctime/ctime-ec-heal.t | 71 ++++++ + tests/basic/ctime/ctime-ec-rebalance.t | 44 ++++ + tests/basic/ctime/ctime-rep-heal.t | 71 ++++++ + tests/basic/ctime/ctime-rep-rebalance.t | 42 ++++ + .../bug-1734370-entry-heal-restore-time.t | 84 +++++++ + tests/volume.rc | 15 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 3 +- + xlators/cluster/afr/src/afr-self-heal-entry.c | 2 + + xlators/cluster/dht/src/dht-common.c | 1 + + xlators/cluster/ec/src/ec-heal.c | 7 +- + xlators/storage/posix/src/posix-entry-ops.c | 8 +- + xlators/storage/posix/src/posix-helpers.c | 31 ++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 57 ++--- + xlators/storage/posix/src/posix-metadata.c | 65 +++++- + xlators/storage/posix/src/posix-metadata.h | 7 + + xlators/storage/posix/src/posix.h | 5 +- + 18 files changed, 714 insertions(+), 53 deletions(-) + create mode 100644 tests/basic/afr/split-brain-healing-ctime.t + create mode 100644 tests/basic/ctime/ctime-ec-heal.t + create mode 100644 tests/basic/ctime/ctime-ec-rebalance.t + create mode 100644 tests/basic/ctime/ctime-rep-heal.t + create mode 100644 tests/basic/ctime/ctime-rep-rebalance.t + create mode 100644 tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t + +diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t +new file mode 100644 +index 0000000..1ca18e3 +--- /dev/null ++++ b/tests/basic/afr/split-brain-healing-ctime.t +@@ -0,0 +1,253 @@ ++#!/bin/bash ++ ++#Test the split-brain resolution CLI commands. ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++function get_replicate_subvol_number { ++ local filename=$1 ++ #get_backend_paths ++ if [ -f $B0/${V0}1/$filename ] ++ then ++ echo 0 ++ elif [ -f $B0/${V0}3/$filename ] ++ then echo 1 ++ else ++ echo -1 ++ fi ++} ++ ++cleanup; ++ ++AREQUAL_PATH=$(dirname $0)/../../utils ++GET_MDATA_PATH=$(dirname $0)/../../utils ++CFLAGS="" ++test "`uname -s`" != "Linux" && { ++ CFLAGS="$CFLAGS -lintl"; ++} ++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} ++TEST $CLI volume set $V0 cluster.self-heal-daemon off ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++cd $M0 ++for i in {1..10} ++do ++ echo "Initial content">>file$i ++done ++ ++replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`) ++replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`) ++ ++############ Create data split-brain in the files. ########################### ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++for file in ${!replica_0_files_list[*]} ++do ++ echo "B1 is down">>${replica_0_files_list[$file]} ++done ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++for file in ${!replica_1_files_list[*]} ++do ++ echo "B3 is down">>${replica_1_files_list[$file]} ++done ++ ++SMALLER_FILE_SIZE=$(stat -c %s file1) ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++for file in ${!replica_0_files_list[*]} ++do ++ echo "B2 is down">>${replica_0_files_list[$file]} ++ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]} ++done ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++for file in ${!replica_1_files_list[*]} ++do ++ echo "B4 is down">>${replica_1_files_list[$file]} ++ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]} ++done ++ ++BIGGER_FILE_SIZE=$(stat -c %s file1) ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 ++ ++ ++############### Acessing the files should now give EIO. ############################### ++TEST ! cat file1 ++TEST ! cat file2 ++TEST ! cat file3 ++TEST ! cat file4 ++TEST ! cat file5 ++TEST ! cat file6 ++TEST ! cat file7 ++TEST ! cat file8 ++TEST ! cat file9 ++TEST ! cat file10 ++################### ++TEST $CLI volume set $V0 cluster.self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 ++ ++################ Heal file1 using the bigger-file option ############## ++$CLI volume heal $V0 split-brain bigger-file /file1 ++EXPECT "0" echo $? ++EXPECT $BIGGER_FILE_SIZE stat -c %s file1 ++ ++################ Heal file2 using the bigger-file option and its gfid ############## ++subvolume=$(get_replicate_subvol_number file2) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2) ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2) ++fi ++GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++$CLI volume heal $V0 split-brain bigger-file $GFIDSTR ++EXPECT "0" echo $? ++ ++################ Heal file3 using the source-brick option ############## ++################ Use the brick having smaller file size as source ####### ++subvolume=$(get_replicate_subvol_number file3) ++if [ $subvolume == 0 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3 ++elif [ $subvolume == 1 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 ++fi ++EXPECT "0" echo $? ++EXPECT $SMALLER_FILE_SIZE stat -c %s file3 ++ ++################ Heal file4 using the source-brick option and it's gfid ############## ++################ Use the brick having smaller file size as source ####### ++subvolume=$(get_replicate_subvol_number file4) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR ++fi ++EXPECT "0" echo $? ++EXPECT $SMALLER_FILE_SIZE stat -c %s file4 ++ ++# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed ++# as part of metadata heal. So mtime would be same, hence it can't be healed ++# using 'latest-mtime' policy, use 'source-brick' option instead. ++################ Heal file5 using the source-brick option ############## ++subvolume=$(get_replicate_subvol_number file5) ++if [ $subvolume == 0 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5 ++elif [ $subvolume == 1 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5 ++fi ++EXPECT "0" echo $? ++ ++if [ $subvolume == 0 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) ++elif [ $subvolume == 1 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) ++fi ++ ++#TODO: To below comparisons on full sub-second resolution ++ ++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] ++ ++mtime_mount_after_heal=$(stat -c %Y file5) ++ ++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] ++ ++################ Heal file6 using the source-brick option and its gfid ############## ++subvolume=$(get_replicate_subvol_number file6) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR ++fi ++EXPECT "0" echo $? ++ ++if [ $subvolume == 0 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) ++elif [ $subvolume == 1 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) ++fi ++ ++#TODO: To below comparisons on full sub-second resolution ++ ++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] ++ ++mtime_mount_after_heal=$(stat -c %Y file6) ++ ++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] ++ ++################ Heal remaining SB'ed files of replica_0 using B1 as source ############## ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 ++EXPECT "0" echo $? ++ ++################ Heal remaining SB'ed files of replica_1 using B3 as source ############## ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 ++EXPECT "0" echo $? ++ ++############### Reading the files should now succeed. ############################### ++TEST cat file1 ++TEST cat file2 ++TEST cat file3 ++TEST cat file4 ++TEST cat file5 ++TEST cat file6 ++TEST cat file7 ++TEST cat file8 ++TEST cat file9 ++TEST cat file10 ++ ++################ File contents on the bricks must be same. ################################ ++TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs) ++TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs) ++ ++############### Trying to heal files not in SB should fail. ############################### ++$CLI volume heal $V0 split-brain bigger-file /file1 ++EXPECT "1" echo $? ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 ++EXPECT "1" echo $? ++ ++cd - ++TEST rm $AREQUAL_PATH/arequal-checksum ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++cleanup +diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t +index 78553e6..315e815 100644 +--- a/tests/basic/afr/split-brain-healing.t ++++ b/tests/basic/afr/split-brain-healing.t +@@ -35,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off + TEST $CLI volume set $V0 cluster.data-self-heal off + TEST $CLI volume set $V0 cluster.metadata-self-heal off + TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 ctime off + TEST $CLI volume start $V0 + TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t +new file mode 100644 +index 0000000..1cb4516 +--- /dev/null ++++ b/tests/basic/ctime/ctime-ec-heal.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++# ++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata' ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++echo "Initial content" > $M0/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++# Kill brick ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++ ++echo "B3 is down" >> $M0/file1 ++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1 ++echo "Entry heal file" > $M0/entry_heal_file1 ++mkdir $M0/entry_heal_dir1 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++TEST $CLI volume start $V0 force ++$CLI volume heal $V0 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t +new file mode 100644 +index 0000000..caccdc1 +--- /dev/null ++++ b/tests/basic/ctime/ctime-ec-rebalance.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++# ++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++ ++# Create files ++mkdir $M0/dir1 ++echo "test data" > $M0/dir1/file1 ++ ++# Add brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8} ++ ++#Trigger rebalance ++TEST $CLI volume rebalance $V0 start force ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 ++ ++#Verify ctime xattr heal on directory ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1" ++ ++b6_mdata=$(get_mdata "$B0/${V0}6/dir1") ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t +new file mode 100644 +index 0000000..ba8b08a +--- /dev/null ++++ b/tests/basic/ctime/ctime-rep-heal.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++# ++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata' ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++echo "Initial content" > $M0/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++# Kill brick ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++ ++echo "B3 is down" >> $M0/file1 ++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1 ++echo "Entry heal file" > $M0/entry_heal_file1 ++mkdir $M0/entry_heal_dir1 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++TEST $CLI volume start $V0 force ++$CLI volume heal $V0 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t +new file mode 100644 +index 0000000..dd9743e +--- /dev/null ++++ b/tests/basic/ctime/ctime-rep-rebalance.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++# ++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++ ++# Add brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8} ++ ++#Trigger rebalance ++TEST $CLI volume rebalance $V0 start force ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 ++ ++#Verify ctime xattr heal on directory ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1" ++ ++b6_mdata=$(get_mdata "$B0/${V0}6/dir1") ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1 ++ ++cleanup; +diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t +new file mode 100644 +index 0000000..298d6ed +--- /dev/null ++++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t +@@ -0,0 +1,84 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++function time_stamps_match { ++ path=$1 ++ mtime_source_b0=$(get_mtime $B0/${V0}0/$path) ++ atime_source_b0=$(get_atime $B0/${V0}0/$path) ++ mtime_source_b2=$(get_mtime $B0/${V0}2/$path) ++ atime_source_b2=$(get_atime $B0/${V0}2/$path) ++ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path) ++ atime_sink_b1=$(get_atime $B0/${V0}1/$path) ++ ++ #The same brick must be the source of heal for both atime and mtime. ++ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \ ++ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]] ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++ ++} ++ ++# Test that the parent dir's timestamps are restored during entry-heal. ++GET_MDATA_PATH=$(dirname $0)/../../utils ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0; ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++############################################################################### ++TEST mkdir $M0/DIR ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/DIR/FILE ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++EXPECT "Y" time_stamps_match DIR ++ctime_source1=$(get_ctime $B0/${V0}0/$path) ++ctime_source2=$(get_ctime $B0/${V0}2/$path) ++ctime_sink=$(get_ctime $B0/${V0}1/$path) ++TEST [ $ctime_source1 -eq $ctime_sink ] ++TEST [ $ctime_source2 -eq $ctime_sink ] ++ ++############################################################################### ++# Repeat the test with ctime feature disabled. ++TEST $CLI volume set $V0 features.ctime off ++TEST mkdir $M0/DIR2 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/DIR2/FILE ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++EXPECT "Y" time_stamps_match DIR2 ++ ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 76a8fd4..9a002d9 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -371,6 +371,19 @@ function get_gfid2path { + getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null + } + ++function get_mdata { ++ local path=$1 ++ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'=' ++} ++ ++function get_mdata_count { ++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l ++} ++ ++function get_mdata_uniq_count { ++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l ++} ++ + function get_xattr_key { + local key=$1 + local path=$2 +@@ -925,7 +938,7 @@ function get_ctime { + local time=$(get-mdata-xattr -c $1) + if [ $time == "-1" ]; + then +- echo $(stat -c %Z $2) ++ echo $(stat -c %Z $1) + else + echo $time + fi +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index b38085a..81ef38a 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -513,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode, + + AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc, + &replies[source].poststat, +- (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL); ++ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME), ++ NULL); + + loc_wipe(&loc); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index e07b521..35b600f 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -1032,6 +1032,8 @@ unlock: + goto postop_unlock; + } + ++ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks, ++ locked_replies); + ret = afr_selfheal_undo_pending( + frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending, + AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 219b072..99cccd6 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -115,6 +115,7 @@ char *xattrs_to_heal[] = {"user.", + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + GF_SELINUX_XATTR_KEY, ++ GF_XATTR_MDATA_KEY, + NULL}; + + char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 0f0f398..06a7016 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2301,9 +2301,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd, + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); +- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies, +- output, frame, ec->xl, &loc, &source_buf, +- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); ++ ret = cluster_setattr( ++ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame, ++ ec->xl, &loc, &source_buf, ++ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL); + EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 34ee2b8..283b305 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -500,7 +500,7 @@ post_op: + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + if (errno != EEXIST) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, +@@ -828,7 +828,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + "setting ACLs on %s failed ", real_path); + } + +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); +@@ -1529,7 +1529,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + } + + ignore: +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); +@@ -2167,7 +2167,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + ignore: +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index d143d4c..6a1a35c 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1188,11 +1188,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key, + #endif + + int +-posix_handle_pair(xlator_t *this, const char *real_path, char *key, ++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf) + { + int sys_ret = -1; + int ret = 0; ++ int op_errno = 0; ++ struct mdata_iatt mdata_iatt = { ++ 0, ++ }; + #ifdef GF_DARWIN_HOST_OS + const int error_code = EINVAL; + #else +@@ -1216,6 +1220,23 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key, + /* ignore this key value pair */ + ret = 0; + goto out; ++ } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) { ++ /* This is either by rebalance or self heal. Create the xattr if it's ++ * not present. Compare and update the larger value if the xattr is ++ * already present. ++ */ ++ if (loc == NULL) { ++ ret = -EINVAL; ++ goto out; ++ } ++ posix_mdata_iatt_from_disk(&mdata_iatt, ++ (posix_mdata_disk_t *)value->data); ++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, ++ &mdata_iatt, &op_errno); ++ if (ret != 0) { ++ ret = -op_errno; ++ } ++ goto out; + } else { + sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags); + #ifdef GF_DARWIN_HOST_OS +@@ -1810,8 +1831,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + return 0; + } + +- ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE, +- filler->stbuf); ++ ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, ++ XATTR_CREATE, filler->stbuf); + if (ret < 0) { + errno = -ret; + return -1; +@@ -1820,7 +1841,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + } + + int +-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) ++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, ++ dict_t *dict) + { + int ret = -1; + +@@ -1834,6 +1856,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) + filler.this = this; + filler.real_path = path; + filler.stbuf = NULL; ++ filler.loc = loc; + + ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler); + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index e0ea85b..a2a518f 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -429,22 +429,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + &frame->root->ctime, stbuf, valid); + } + +- if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { +- /* +- * If ctime is not enabled, we have no means to associate an +- * arbitrary ctime with the file, so as a fallback, we ignore +- * the ctime payload and update the file ctime to current time +- * (which is possible directly with the POSIX API). +- */ +- op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL); +- if (op_ret == -1) { +- op_errno = errno; +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, +- "setattr (utimes) on %s " +- "failed", +- real_path); +- goto out; +- } ++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { ++ posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (!valid) { +@@ -469,14 +456,6 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + goto out; + } + +- if (valid & GF_SET_ATTR_CTIME && priv->ctime) { +- /* +- * If we got ctime payload, we override +- * the ctime of statpost with that. +- */ +- statpost.ia_ctime = stbuf->ia_ctime; +- statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec; +- } + posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); + + if (xdata) +@@ -592,6 +571,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt statpost = { + 0, + }; ++ struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int32_t ret = -1; +@@ -604,6 +584,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + ++ priv = this->private; ++ VALIDATE_OR_GOTO(priv, out); ++ + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); +@@ -656,6 +639,11 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + &frame->root->ctime, stbuf, valid); + } + ++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { ++ posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, ++ &frame->root->ctime, stbuf, valid); ++ } ++ + if (!valid) { + op_ret = sys_fchown(pfd->fd, -1, -1); + if (op_ret == -1) { +@@ -2578,7 +2566,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + + filler = tmp; + +- return posix_handle_pair(filler->this, filler->real_path, k, v, ++ return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + filler->flags, filler->stbuf); + } + +@@ -2641,27 +2629,27 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++ MAKE_INODE_HANDLE(real_path, this, loc, NULL); ++ if (!real_path) { ++ op_ret = -1; ++ op_errno = ESTALE; ++ goto out; ++ } ++ + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ +- ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt, +- &op_errno); ++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, ++ &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + +- MAKE_INODE_HANDLE(real_path, this, loc, NULL); +- if (!real_path) { +- op_ret = -1; +- op_errno = ESTALE; +- goto out; +- } +- + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); + + op_ret = -1; +@@ -2796,6 +2784,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + filler.real_path = real_path; + filler.this = this; + filler.stbuf = &preop; ++ filler.loc = loc; + + #ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 532daa2..9efaf99 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -56,6 +56,19 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) + out->atime.tv_nsec = be64toh(in->atime.tv_nsec); + } + ++void ++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in) ++{ ++ out->ia_ctime = be64toh(in->ctime.tv_sec); ++ out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec); ++ ++ out->ia_mtime = be64toh(in->mtime.tv_sec); ++ out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec); ++ ++ out->ia_atime = be64toh(in->atime.tv_sec); ++ out->ia_atime_nsec = be64toh(in->atime.tv_nsec); ++} ++ + /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ + static int + posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, +@@ -341,6 +354,7 @@ posix_compare_timespec(struct timespec *first, struct timespec *second) + + int + posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ const char *realpath, + struct mdata_iatt *mdata_iatt, int *op_errno) + { + posix_mdata_t *mdata = NULL; +@@ -369,8 +383,8 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + goto unlock; + } + +- ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata, +- op_errno); ++ ret = posix_fetch_mdata_xattr(this, realpath, -1, inode, ++ (void *)mdata, op_errno); + if (ret == 0) { + /* Got mdata from disk. This is a race, another client + * has healed the xattr during lookup. So set it in inode +@@ -412,7 +426,7 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + } + } + +- ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata); ++ ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "gfid: %s key:%s ", uuid_utoa(inode->gfid), +@@ -445,7 +459,8 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + +- if (update_utime && (!u_atime || !u_mtime)) { ++ if (update_utime && (flag->ctime && !time) && (flag->atime && !u_atime) && ++ (flag->mtime && !u_mtime)) { + goto out; + } + +@@ -652,6 +667,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + return; + } + ++/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs ++ * to be modified ++ */ ++void ++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid) ++{ ++ int32_t ret = 0; ++#if defined(HAVE_UTIMENSAT) ++ struct timespec tv_ctime = { ++ 0, ++ }; ++#else ++ struct timeval tv_ctime = { ++ 0, ++ }; ++#endif ++ posix_mdata_flag_t flag = { ++ 0, ++ }; ++ ++ struct posix_private *priv = NULL; ++ priv = this->private; ++ ++ if (inode && priv->ctime) { ++ tv_ctime.tv_sec = stbuf->ia_ctime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec); ++ flag.ctime = 1; ++ ++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL, ++ NULL, NULL, &flag, _gf_true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, ++ "posix set mdata atime failed on file:" ++ " %s gfid:%s", ++ real_path, uuid_utoa(inode->gfid)); ++ } ++ } ++ return; ++} ++ + static void + posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) + { +diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h +index c176699..63e8771 100644 +--- a/xlators/storage/posix/src/posix-metadata.h ++++ b/xlators/storage/posix/src/posix-metadata.h +@@ -43,6 +43,10 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); + void ++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid); ++void + posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf); + void +@@ -56,7 +60,10 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); + int + posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ const char *realpath, + struct mdata_iatt *mdata_iatt, + int *op_errno); ++void ++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in); + + #endif /* _POSIX_METADATA_H */ +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 64288a7..dd51062 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -339,7 +339,7 @@ dict_t * + posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, + int fdnum, dict_t *xattr, struct iatt *buf); + int +-posix_handle_pair(xlator_t *this, const char *real_path, char *key, ++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf); + int + posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, +@@ -352,7 +352,8 @@ int + posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, + dict_t *xattr_req); + int +-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict); ++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, ++ dict_t *dict); + + int + posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, +-- +1.8.3.1 + diff --git a/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch new file mode 100644 index 0000000..9f43ff8 --- /dev/null +++ b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch @@ -0,0 +1,62 @@ +From bfb64a0e685eb5755ceda6c54690335564e135c9 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Mon, 16 Sep 2019 14:22:34 +0530 +Subject: [PATCH 296/297] glusterfind: pre command failure on a modify + +Label: DOWNSTREAM ONLY + +On upstream we have gfid_to_all_paths_using_gfid2path instead of +gfid_to_path_using_pgfid and so we do not hit this in upstream. + +Problem: On a modify, the pre commands runs through the find function. +where the number of arguments sent mismatches and causes a stderr. +The mismatch is because of both changelog and brickfind use the find(), +but the brickfind was alone handled. + +Fix: Have handled the additional argument on the changelog side as well. +Received it as a dummy variable for changelog. + +Change-Id: I5eecdd993e477b68a0e486db2ad7e56ba94bbf02 +fixes: bz#1733970 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/181095 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Rinku Kothiya +--- + tools/glusterfind/src/changelog.py | 5 +++-- + tools/glusterfind/src/utils.py | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index 40c381b..ef982db 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -141,8 +141,9 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): + + # Length of brick path, to remove from output path + brick_path_len = len(brick) +- +- def output_callback(path, inode): ++ # is_dir is a dummy variable to make it compitable with the find ++ # used in brickfind ++ def output_callback(path, inode, is_dir): + # For each path found, encodes it and updates path1 + # Also updates converted flag in inodegfid table as 1 + path = path.strip() +diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py +index cc09903..e226c5a 100644 +--- a/tools/glusterfind/src/utils.py ++++ b/tools/glusterfind/src/utils.py +@@ -70,7 +70,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True, + else: + filter_result = filter_func(full_path) + if filter_result is not None: +- callback_func(full_path, filter_result) ++ callback_func(full_path, filter_result, None) + else: + filter_result = filter_func(full_path) + if filter_result is not None: +-- +1.8.3.1 + diff --git a/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch new file mode 100644 index 0000000..47b5da0 --- /dev/null +++ b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch @@ -0,0 +1,89 @@ +From 37555b6c83d3a979033111a754ee1728dab254f5 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham +Date: Wed, 18 Sep 2019 17:38:52 +0530 +Subject: [PATCH 297/297] rpmbuild: fixing the build errors with 2a905a8ae + +Label: DOWNSTREAM ONLY + +Have added a Makefile inside extras/quota to remove the +No rule to make target error for quota/log_accounting.sh + +Change-Id: Ia3f6b3fa21a0de7eb3bdb31b3d205139df412aca +fixes: bz#1719171 +Signed-off-by: Hari Gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/181326 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Atin Mukherjee +--- + configure.ac | 1 + + extras/Makefile.am | 6 +----- + extras/quota/Makefile.am | 8 ++++++++ + 3 files changed, 10 insertions(+), 5 deletions(-) + create mode 100644 extras/quota/Makefile.am + +diff --git a/configure.ac b/configure.ac +index f597b86..327733e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -232,6 +232,7 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/reset/pre/Makefile + extras/python/Makefile + extras/snap_scheduler/Makefile ++ extras/quota/Makefile + events/Makefile + events/src/Makefile + events/src/eventsapiconf.py +diff --git a/extras/Makefile.am b/extras/Makefile.am +index 8cbfda1..31ccdf5 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -12,7 +12,7 @@ EditorMode_DATA = glusterfs-mode.el glusterfs.vim + + SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ + $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ +- ganesha ++ ganesha quota + + confdir = $(sysconfdir)/glusterfs + if WITH_SERVER +@@ -30,14 +30,11 @@ endif + + scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh +-scripts_SCRIPTS += quota/log_accounting.sh + scripts_SCRIPTS += collect-system-stats.sh + scripts_SCRIPTS += identify-hangs.sh + if WITH_SERVER + scripts_SCRIPTS += post-upgrade-script-for-quota.sh \ + pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh +-scripts_SCRIPTS += quota/quota_fsck.py +-scripts_SCRIPTS += quota/xattr_analysis.py + if USE_SYSTEMD + scripts_SCRIPTS += control-cpu-load.sh + scripts_SCRIPTS += control-mem.sh +@@ -56,7 +53,6 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co + stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \ + control-cpu-load.sh control-mem.sh group-distributed-virt \ + thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \ +- quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \ + collect-system-stats.sh identify-hangs.sh + + if WITH_SERVER +diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am +new file mode 100644 +index 0000000..cdb6be1 +--- /dev/null ++++ b/extras/quota/Makefile.am +@@ -0,0 +1,8 @@ ++scriptsdir = $(datadir)/glusterfs/scripts ++scripts_SCRIPTS = log_accounting.sh ++ ++if WITH_SERVER ++scripts_SCRIPTS += xattr_analysis.py quota_fsck.py ++endif ++ ++EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py +-- +1.8.3.1 + diff --git a/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch new file mode 100644 index 0000000..72daa15 --- /dev/null +++ b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch @@ -0,0 +1,56 @@ +From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Sat, 21 Sep 2019 01:07:30 +0530 +Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection + +Problem: + +Geo-rep session for non-root going faulty. + +Solution: + +During worker start we do not construct slave url and use 'args.resource_remote' +which is basically just slave-hostname. +This works better for root session but fails in non-root session during +ssh command. +Using slave url solves this issue. + +Backport of: + >fixes: bz#1753928 + >Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 + >Signed-off-by: Sunny Kumar + +Upstream patch: + https://review.gluster.org/#/c/glusterfs/+/23465/ + +BUG: 1754407 +Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/181895 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/subcmds.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 8de7db2..f8515f2 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -73,8 +73,11 @@ def subcmd_worker(args): + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) +- slavevol = args.slave.split("::")[-1] +- slavehost = args.resource_remote ++ slave_url, slavevol = args.slave.split("::") ++ if "@" not in slave_url: ++ slavehost = args.resource_remote ++ else: ++ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote) + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() +-- +1.8.3.1 + diff --git a/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch new file mode 100644 index 0000000..9dea8cc --- /dev/null +++ b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch @@ -0,0 +1,156 @@ +From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 20 Sep 2019 09:39:12 +0530 +Subject: [PATCH 299/302] geo-rep: performance improvement while syncing + renames with existing gfid + +Problem: +The bug[1] addresses issue of data inconsistency when handling RENAME with +existing destination. This fix requires some performance tuning considering +this issue occurs in heavy rename workload. + +Solution: +If distribution count for master volume is one do not verify op's on +master and go ahead with rename. + +The performance improvement with this patch can only be observed if +master volume has distribution count one. + +[1]. https://bugzilla.redhat.com/show_bug.cgi?id=1694820 +Backport of: + + >fixes: bz#1753857 + >Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b + >Signed-off-by: Sunny Kumar + +Upstream Patch: + https://review.gluster.org/#/c/glusterfs/+/23459/ + +BUG: 1726000 +Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/181893 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + geo-replication/gsyncd.conf.in | 5 +++++ + geo-replication/syncdaemon/gsyncd.py | 2 ++ + geo-replication/syncdaemon/monitor.py | 2 ++ + geo-replication/syncdaemon/resource.py | 13 +++++++++++-- + geo-replication/syncdaemon/syncdutils.py | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 5ebd57a..9155cd8 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -23,6 +23,11 @@ configurable=false + type=int + value=1 + ++[master-distribution-count] ++configurable=false ++type=int ++value=1 ++ + [glusterd-workdir] + value = @GLUSTERD_WORKDIR@ + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index a4c6f32..6ae5269 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -134,6 +134,8 @@ def main(): + help="Directory where Gluster binaries exist on slave") + p.add_argument("--slave-access-mount", action="store_true", + help="Do not lazy umount the slave volume") ++ p.add_argument("--master-dist-count", type=int, ++ help="Master Distribution count") + + # Status + p = sp.add_parser("status") +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 234f3f1..236afe7 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot): + tier = vol.is_tier() + disperse_count = vol.disperse_count(tier, hot) + replica_count = vol.replica_count(tier, hot) ++ distribute_count = vol.distribution_count(tier, hot) ++ gconf.setconfig("master-distribution-count", distribute_count) + + if (tier and not hot): + brick_idx = brick_idx - vol.get_hot_bricks_count(tier) +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index b16db60..189d8a1 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -377,6 +377,7 @@ class Server(object): + def entry_ops(cls, entries): + pfx = gauxpfx() + logging.debug('entries: %s' % repr(entries)) ++ dist_count = rconf.args.master_dist_count + + def entry_purge(op, entry, gfid, e, uid, gid): + # This is an extremely racy code and needs to be fixed ASAP. +@@ -686,9 +687,15 @@ class Server(object): + raise + else: + raise +- elif not matching_disk_gfid(gfid, en): ++ elif not matching_disk_gfid(gfid, en) and dist_count > 1: + collect_failure(e, EEXIST, uid, gid, True) + else: ++ # We are here which means matching_disk_gfid for ++ # both source and destination has returned false ++ # and distribution count for master vol is greater ++ # then one. Which basically says both the source and ++ # destination exist and not hardlinks. ++ # So we are safe to go ahead with rename here. + rename_with_disk_gfid_confirmation(gfid, entry, en, + uid, gid) + if blob: +@@ -1409,7 +1416,9 @@ class SSH(object): + '--slave-gluster-log-level', + gconf.get("slave-gluster-log-level"), + '--slave-gluster-command-dir', +- gconf.get("slave-gluster-command-dir")] ++ gconf.get("slave-gluster-command-dir"), ++ '--master-dist-count', ++ str(gconf.get("master-distribution-count"))] + + if gconf.get("slave-access-mount"): + args_to_slave.append('--slave-access-mount') +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 2ee10ac..aadaebd 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -926,6 +926,14 @@ class Volinfo(object): + else: + return int(self.get('disperseCount')[0].text) + ++ def distribution_count(self, tier, hot): ++ if (tier and hot): ++ return int(self.get('hotBricks/hotdistCount')[0].text) ++ elif (tier and not hot): ++ return int(self.get('coldBricks/colddistCount')[0].text) ++ else: ++ return int(self.get('distCount')[0].text) ++ + @property + @memoize + def hot_bricks(self): +@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object): + def disperse_count(self, tier, hot): + return gconf.get("master-disperse-count") + ++ def distribution_count(self, tier, hot): ++ return gconf.get("master-distribution-count") ++ + @property + @memoize + def hot_bricks(self): +-- +1.8.3.1 + diff --git a/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch new file mode 100644 index 0000000..62bac41 --- /dev/null +++ b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch @@ -0,0 +1,70 @@ +From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 25 Sep 2019 14:55:19 +0530 +Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick + start issued + +remove-brick start command gives displays below error: + +It is recommended that remove-brick be run with cluster.force-migration +option disabled to prevent possible data corruption. Doing so will ensure +that files that receive writes during migration will not be migrated and +will need to be manually copied after the remove-brick commit operation. +Please check the value of the option and update accordingly. +Do you want to continue with your current cluster.force-migration settings? (y/n) + +As we are not qualifying cluster.force-migration for 3.5.0, +we should not display this message. So, removing it. + +Label: DOWNSTREAM ONLY + +BUG: 1755227 +Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7 +fixes: bz#1755227 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/182009 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-cmd-parser.c | 2 -- + cli/src/cli-cmd-volume.c | 11 ----------- + 2 files changed, 13 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 92ceb8e..4456a7b 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + wordcount--; + if (!strcmp("start", w)) { + command = GF_OP_CMD_START; +- if (question) +- *question = 1; + } else if (!strcmp("commit", w)) { + command = GF_OP_CMD_COMMIT; + } else if (!strcmp("stop", w)) { +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index a42e663..6b958bd 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state, + "Remove-brick force will not migrate files from the " + "removed bricks, so they will no longer be available" + " on the volume.\nDo you want to continue?"; +- } else if (command == GF_OP_CMD_START) { +- question = +- "It is recommended that remove-brick be run with" +- " cluster.force-migration option disabled to prevent" +- " possible data corruption. Doing so will ensure that" +- " files that receive writes during migration will not" +- " be migrated and will need to be manually copied" +- " after the remove-brick commit operation. Please" +- " check the value of the option and update accordingly." +- " \nDo you want to continue with your current" +- " cluster.force-migration settings?"; + } + + if (!brick_count) { +-- +1.8.3.1 + diff --git a/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch new file mode 100644 index 0000000..270a0d7 --- /dev/null +++ b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch @@ -0,0 +1,61 @@ +From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 18 Sep 2019 19:11:33 +0530 +Subject: [PATCH 301/302] posix: Brick is going down unexpectedly + +Problem: In brick_mux environment, while multiple volumes are + created (1-1000) sometimes brick is going down due to + health_check thread failure + +Solution: Ignore EAGAIN error in health_check thread code to + avoid the issue + +> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +> Fixes: bz#1751907 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23437/) + +Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +BUG: 1731826 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/182106 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 6a1a35c..35dd3b6 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2108,14 +2108,20 @@ out: + if (fd != -1) { + sys_close(fd); + } ++ + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); +- gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, +- "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, +- file_path, strerror(op_errno), priv->hostname, priv->base_path, +- timeout); ++ ++ if ((op_errno == EAGAIN) || (ret == EAGAIN)) { ++ ret = 0; ++ } else { ++ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, ++ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, ++ file_path, strerror(op_errno), priv->hostname, ++ priv->base_path, timeout); ++ } + } + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch new file mode 100644 index 0000000..7972767 --- /dev/null +++ b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch @@ -0,0 +1,67 @@ +From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 25 Sep 2019 11:56:35 +0200 +Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not + found" messages + +When self-heal daemon receives an inodelk contention notification, it tries +to locate the related inode using inode_find() and the inode table owned by +top-most xlator, which in this case doesn't have any inode table. This causes +many messages to be logged by inode_find() function because the inode table +passed is NULL. + +This patch prevents this by making sure the inode table is not NULL before +calling inode_find(). + +Upstream patch: +> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23481 +> Fixes: bz#1755344 +> Signed-off-by: Xavi Hernandez + +Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +BUG: 1754790 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/182207 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 19094c4..3f31c74 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + struct gf_upcall_cache_invalidation *ci = NULL; + struct gf_upcall_inodelk_contention *lc = NULL; + inode_t *inode; ++ inode_table_t *table; + + switch (upcall->event_type) { + case GF_UPCALL_CACHE_INVALIDATION: +@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + /* The lock is not owned by EC, ignore it. */ + return _gf_true; + } +- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable, +- upcall->gfid); ++ table = ((xlator_t *)ec->xl->graph->top)->itable; ++ if (table == NULL) { ++ /* Self-heal daemon doesn't have an inode table on the top ++ * xlator because it doesn't need it. In this case we should ++ * use the inode table managed by EC itself where all inodes ++ * being healed should be present. However self-heal doesn't ++ * use eager-locking and inodelk's are already released as ++ * soon as possible. In this case we can safely ignore these ++ * notifications. */ ++ return _gf_false; ++ } ++ inode = inode_find(table, upcall->gfid); + /* If inode is not found, it means that it's already released, + * so we can ignore it. Probably it has been released and + * destroyed while the contention notification was being sent. +-- +1.8.3.1 + diff --git a/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch new file mode 100644 index 0000000..8641353 --- /dev/null +++ b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch @@ -0,0 +1,45 @@ +From ae4f538065d26a277e38810c6eef18c0312cd1f3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 26 Sep 2019 17:52:30 +0530 +Subject: [PATCH 303/304] posix: heketidbstorage bricks go down during PVC + creation + +Problem: In OCS environment heketidbstorage is detached due + to health_check thread is failed.Sometime aio_write + is not successfully finished within default health-check-timeout + limit and the brick is detached. + +Solution: To avoid the issue increase default timeout to 20s + +> Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b +> Fixes: bz#1755900 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit c6df9e962483bac5bfcd8916318b19040387ce81) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23495/) + +Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b +BUG: 1752713 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/182387 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/storage/posix/src/posix-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index 69857d9..2cb58ba 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1257,7 +1257,7 @@ struct volume_options posix_options[] = { + {.key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, +- .default_value = "10", ++ .default_value = "20", + .validate = GF_OPT_VALIDATE_MIN, + .description = + "Interval in seconds to wait aio_write finish for health check, " +-- +1.8.3.1 + diff --git a/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch new file mode 100644 index 0000000..5f16e0a --- /dev/null +++ b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch @@ -0,0 +1,194 @@ +From ad233c1b3abdfe2bdfd1eacc83b5f84b7afa6b46 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Tue, 1 Oct 2019 17:37:15 +0530 +Subject: [PATCH 304/304] cluster/dht: Correct fd processing loop + +The fd processing loops in the +dht_migration_complete_check_task and the +dht_rebalance_inprogress_task functions were unsafe +and could cause an open to be sent on an already freed +fd. This has been fixed. + +> Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540 +> Fixes: bz#1757399 +> Signed-off-by: N Balachandran +> (Cherry picked from commit 9b15867070b0cc241ab165886292ecffc3bc0aed) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23506/) + +Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540 +BUG: 1756325 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/182826 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++---------- + 1 file changed, 62 insertions(+), 22 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index 4c57e0d..1e9fee0 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -1261,6 +1261,7 @@ dht_migration_complete_check_task(void *data) + fd_t *tmp = NULL; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; ++ gf_boolean_t skip_open = _gf_false; + int open_failed = 0; + + this = THIS; +@@ -1399,24 +1400,34 @@ dht_migration_complete_check_task(void *data) + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ +- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) +- { +- if (fd_is_anonymous(iter_fd)) +- continue; +- +- if (dht_fd_open_on_dst(this, iter_fd, dst_node)) +- continue; +- ++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); ++ while (&iter_fd->inode_list != (&inode->fd_list)) { ++ if (fd_is_anonymous(iter_fd) || ++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) { ++ if (!tmp) { ++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), ++ inode_list); ++ continue; ++ } ++ skip_open = _gf_true; ++ } + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ +- __fd_ref(iter_fd); ++ fd_ref(iter_fd); + + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } ++ if (skip_open) ++ goto next; ++ + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ +@@ -1438,9 +1449,11 @@ dht_migration_complete_check_task(void *data) + dht_fd_ctx_set(this, iter_fd, dst_node); + } + +- fd_unref(iter_fd); +- ++ next: + LOCK(&inode->lock); ++ skip_open = _gf_false; ++ tmp = iter_fd; ++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); + } + + SYNCTASK_SETID(frame->root->uid, frame->root->gid); +@@ -1453,6 +1466,10 @@ dht_migration_complete_check_task(void *data) + + unlock: + UNLOCK(&inode->lock); ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } + + out: + if (dict) { +@@ -1534,6 +1551,7 @@ dht_rebalance_inprogress_task(void *data) + int open_failed = 0; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; ++ gf_boolean_t skip_open = _gf_false; + + this = THIS; + frame = data; +@@ -1654,24 +1672,40 @@ dht_rebalance_inprogress_task(void *data) + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ +- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) +- { +- if (fd_is_anonymous(iter_fd)) +- continue; +- +- if (dht_fd_open_on_dst(this, iter_fd, dst_node)) +- continue; +- ++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); ++ while (&iter_fd->inode_list != (&inode->fd_list)) { + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ +- __fd_ref(iter_fd); + ++ if (fd_is_anonymous(iter_fd) || ++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) { ++ if (!tmp) { ++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), ++ inode_list); ++ continue; ++ } ++ skip_open = _gf_true; ++ } ++ ++ /* Yes, this is ugly but there isn't a cleaner way to do this ++ * the fd_ref is an atomic increment so not too bad. We want to ++ * reduce the number of inode locks and unlocks. ++ */ ++ ++ fd_ref(iter_fd); + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } ++ if (skip_open) ++ goto next; ++ + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ +@@ -1692,9 +1726,11 @@ dht_rebalance_inprogress_task(void *data) + dht_fd_ctx_set(this, iter_fd, dst_node); + } + +- fd_unref(iter_fd); +- ++ next: + LOCK(&inode->lock); ++ skip_open = _gf_false; ++ tmp = iter_fd; ++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); + } + + SYNCTASK_SETID(frame->root->uid, frame->root->gid); +@@ -1702,6 +1738,10 @@ dht_rebalance_inprogress_task(void *data) + unlock: + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } + if (open_failed) { + ret = -1; + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch new file mode 100644 index 0000000..a1e77c6 --- /dev/null +++ b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch @@ -0,0 +1,56 @@ +From 90e52f3b44da0ed05e35ebd474e284d45794b0d6 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Thu, 10 Oct 2019 20:40:49 +0530 +Subject: [PATCH 305/307] glusterd: rebalance start should fail when quorum is + not met + +rebalance start should not succeed if quorum is not met. +this patch adds a condition to check whether quorum is met +in pre-validation stage. + +> fixes: bz#1760467 +> Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23536/ +BUG: 1760261 +Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/183146 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/glusterd/quorum-validation.t | 2 ++ + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t +index ff46729..3cc3351 100644 +--- a/tests/bugs/glusterd/quorum-validation.t ++++ b/tests/bugs/glusterd/quorum-validation.t +@@ -34,6 +34,8 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2 + TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start + TEST ! $CLI_1 volume set $V0 barrier enable + ++#quorum is not met, rebalance/profile start should fail ++TEST ! $CLI_1 volume rebalance $V0 start + TEST ! $CLI_1 volume profile $V0 start + + #bug-1690753 - Volume stop when quorum not met is successful +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index ec78913..a4915f3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1059,7 +1059,8 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + goto out; + } + +- if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) { ++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME || ++ op == GD_OP_REBALANCE) { + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, +-- +1.8.3.1 + diff --git a/SOURCES/0306-cli-fix-distCount-value.patch b/SOURCES/0306-cli-fix-distCount-value.patch new file mode 100644 index 0000000..0e8b9f2 --- /dev/null +++ b/SOURCES/0306-cli-fix-distCount-value.patch @@ -0,0 +1,43 @@ +From 167980565e1ab56989b25fe6aa0203aeb7970c8b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Sun, 6 Oct 2019 19:05:28 +0530 +Subject: [PATCH 306/307] cli: fix distCount value + +gluster volume info --xml id displaying wrong distCount +value. This patch addresses it. + +> fixes: bz#1758878 +> Change-Id: I64081597e06018361e6524587b433b0c4b2a0260 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23521/ + +BUG: 1758618 +Change-Id: I64081597e06018361e6524587b433b0c4b2a0260 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/183147 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-xml-output.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c +index b417bb8..006e2fb 100644 +--- a/cli/src/cli-xml-output.c ++++ b/cli/src/cli-xml-output.c +@@ -2548,8 +2548,9 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict) + ret = dict_get_int32(dict, key, &dist_count); + if (ret) + goto out; +- ret = xmlTextWriterWriteFormatElement( +- local->writer, (xmlChar *)"distCount", "%d", dist_count); ++ ret = xmlTextWriterWriteFormatElement(local->writer, ++ (xmlChar *)"distCount", "%d", ++ (brick_count / dist_count)); + XML_RET_CHECK_AND_GOTO(ret, out); + + snprintf(key, sizeof(key), "volume%d.stripe_count", i); +-- +1.8.3.1 + diff --git a/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch new file mode 100644 index 0000000..7a85b50 --- /dev/null +++ b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch @@ -0,0 +1,42 @@ +From be9695391f39fe6eb1d157f6bfd018116d1ee42b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Mon, 30 Sep 2019 13:14:06 +0530 +Subject: [PATCH 307/307] ssl: fix RHEL8 regression failure + +This tests is failing with +"SSL routines:SSL_CTX_use_certificate:ee key too small" +in RHEL8. This change is made according to +https://access.redhat.com/solutions/4157431 + +> updates: bz#1756900 +> Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23501/ + +BUG: 1704562 +Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/183148 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/cli/bug-1320388.t | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index f5ffcbe..8e5d77b 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 3072 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-- +1.8.3.1 + diff --git a/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch new file mode 100644 index 0000000..adbeb43 --- /dev/null +++ b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch @@ -0,0 +1,347 @@ +From 27f799563c1c2c1986662ed4a3a83d834c04fd98 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 14 Oct 2019 15:42:31 +0530 +Subject: [PATCH 308/308] dht: Rebalance causing IO Error - File descriptor in + bad state + +Problem : When a file is migrated, dht attempts to re-open all open + fds on the new cached subvol. Earlier, if dht had not opened the fd, + the client xlator would be unable to find the remote fd and would + fall back to using an anon fd for the fop. That behavior changed with + https://review.gluster.org/#/c/glusterfs/+/15804, causing fops to fail + with EBADFD if the fd was not available on the cached subvol. + The client xlator returns EBADFD if the remote fd is not found but + dht only checks for EBADF before re-opening fds on the new cached subvol. + +Solution: Handle EBADFD at dht code path to avoid the issue + +> Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8 +> Fixes: bz#1758579 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 9314a9fbf487614c736cf6c4c1b93078d37bb9df) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23518/) + +Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8 +BUG: 1758432 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/183370 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 27 +++++++++++++++++--- + xlators/cluster/dht/src/dht-common.h | 19 ++++++++++++++ + xlators/cluster/dht/src/dht-helper.c | 29 +++++++++++++++++++++ + xlators/cluster/dht/src/dht-inode-read.c | 42 +++++++++++++++++++++++++++---- + xlators/cluster/dht/src/dht-inode-write.c | 16 ++++++------ + 5 files changed, 116 insertions(+), 17 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 99cccd6..37952ba 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -53,6 +53,17 @@ dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); + int + dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc); + ++/* Check the xdata to make sure EBADF has been set by client xlator */ ++int32_t ++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno) ++{ ++ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) && ++ !(local->fd_checked)) { ++ return 1; ++ } ++ return 0; ++} ++ + /* Sets the blocks and size values to fixed values. This is to be called + * only for dirs. The caller is responsible for checking the type + */ +@@ -4529,6 +4540,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int this_call_cnt = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; ++ int ret = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(frame->local, err); +@@ -4537,6 +4549,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + conf = this->private; + local = frame->local; + ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto err; ++ return 0; ++ } ++ + LOCK(&frame->lock); + { + if (!xattr || (op_ret == -1)) { +@@ -5204,8 +5223,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSETXATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -5929,8 +5948,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FREMOVEXATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index c516271..ce11f02 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1230,6 +1230,22 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata); + + int ++dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata); ++ ++int ++dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, dict_t *xattr, dict_t *xdata); ++ ++int ++dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata); ++int ++dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); ++ ++int + gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); + + void +@@ -1525,4 +1541,7 @@ int + dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); + ++int32_t ++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); ++ + #endif /* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index 1e9fee0..4f7370d 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -366,6 +366,23 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame, + + break; + ++ case GF_FOP_FXATTROP: ++ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->fxattrop, local->fd, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ break; ++ ++ case GF_FOP_FGETXATTR: ++ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, ++ local->fd, local->key, NULL); ++ break; ++ ++ case GF_FOP_FINODELK: ++ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk, ++ local->key, local->fd, local->rebalance.lock_cmd, ++ &local->rebalance.flock, local->xattr_req); ++ break; + default: + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, +@@ -429,6 +446,18 @@ handle_err: + DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); + break; + ++ case GF_FOP_FXATTROP: ++ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); ++ break; ++ ++ case GF_FOP_FGETXATTR: ++ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL); ++ break; ++ ++ case GF_FOP_FINODELK: ++ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL); ++ break; ++ + default: + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, +diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c +index cacfe35..0c209a5 100644 +--- a/xlators/cluster/dht/src/dht-inode-read.c ++++ b/xlators/cluster/dht/src/dht-inode-read.c +@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + local = frame->local; + prev = cookie; + +- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) && +- !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSTAT) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + if (local->call_cnt != 1) + goto out; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + if (local->call_cnt != 1) + goto out; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + + local->op_errno = op_errno; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -1223,6 +1223,13 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->call_cnt != 1) + goto out; + ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto out; ++ return 0; ++ } ++ + ret = dht_read_iatt_from_xdata(this, xdata, &stbuf); + + if ((!op_ret) && (ret)) { +@@ -1535,8 +1542,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + + { ++ dht_local_t *local = NULL; ++ int ret = 0; ++ ++ GF_VALIDATE_OR_GOTO("dht", frame, out); ++ GF_VALIDATE_OR_GOTO("dht", this, out); ++ GF_VALIDATE_OR_GOTO("dht", frame->local, out); ++ ++ local = frame->local; ++ ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto out; ++ return 0; ++ } ++ ++out: + dht_lk_inode_unref(frame, op_ret); + DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata); ++ + return 0; + } + +@@ -1574,6 +1599,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + if (ret) + goto err; + */ ++ local->rebalance.flock = *lock; ++ local->rebalance.lock_cmd = cmd; ++ local->key = gf_strdup(volume); ++ ++ if (xdata) ++ local->xattr_req = dict_ref(xdata); ++ + STACK_WIND(frame, dht_finodelk_cbk, lock_subvol, + lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata); + +diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c +index b26b705..b6b349d 100644 +--- a/xlators/cluster/dht/src/dht-inode-write.c ++++ b/xlators/cluster/dht/src/dht-inode-write.c +@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could be a valid bad fd error. + */ + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -262,8 +262,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could actually be a valid error. + */ + +- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) && +- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FTRUNCATE) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -489,7 +489,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could actually be a valid error. + */ + +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -666,7 +666,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -838,7 +838,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -1005,8 +1005,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSETATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch new file mode 100644 index 0000000..6ae359e --- /dev/null +++ b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch @@ -0,0 +1,240 @@ +From 2b1738402276f43d7cb64542b74cb50145e46d77 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Wed, 16 Oct 2019 14:25:47 +0530 +Subject: [PATCH 309/309] geo-rep: Fix config upgrade on non-participating node + +After upgrade, if the config files are of old format, it +gets migrated to new format. Monitor process migrates it. +Since monitor doesn't run on nodes where bricks are not +hosted, it doesn't get migrated there. So this patch fixes +the config upgrade on nodes which doesn't host bricks. +This happens during config either on get/set/reset. + +Backport of: + > Patch: https://review.gluster.org/23555 + > Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba + > Signed-off-by: Kotresh HR + > fixes: bz#1762220 + +Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba +Signed-off-by: Kotresh HR +BUG: 1760939 +Reviewed-on: https://code.engineering.redhat.com/gerrit/183461 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncd.py | 3 +- + tests/00-geo-rep/georep-config-upgrade.t | 132 +++++++++++++++++++++++++++++++ + tests/00-geo-rep/gsyncd.conf.old | 47 +++++++++++ + 3 files changed, 181 insertions(+), 1 deletion(-) + create mode 100644 tests/00-geo-rep/georep-config-upgrade.t + create mode 100644 tests/00-geo-rep/gsyncd.conf.old + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 6ae5269..7b48d82 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -255,7 +255,8 @@ def main(): + if args.subcmd == "slave": + override_from_args = True + +- if args.subcmd == "monitor": ++ if config_file is not None and \ ++ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]: + ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"]) + if ret is not None: + gconf.config_upgrade(config_file, ret) +diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t +new file mode 100644 +index 0000000..557461c +--- /dev/null ++++ b/tests/00-geo-rep/georep-config-upgrade.t +@@ -0,0 +1,132 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=300 ++OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old ++WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++##Variables ++GEOREP_CLI="$CLI volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++############################################################ ++#SETUP VOLUMES AND GEO-REPLICATION ++############################################################ ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Create, start and mount meta_volume ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++############################################################ ++#BASIC GEO-REPLICATION TESTS ++############################################################ ++ ++#Create geo-rep session ++TEST create_georep_session $master $slave ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Enable_metavolume ++TEST $GEOREP_CLI $master $slave config use_meta_volume true ++ ++#Wait for common secret pem file to be created ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file ++ ++#Verify the keys are distributed ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed ++ ++#Start_georep ++TEST $GEOREP_CLI $master $slave start ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active" ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive" ++ ++TEST $GEOREP_CLI $master $slave config sync-method tarssh ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Copy old config file ++mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++ ++#Check if config get all updates config_file ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config get updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config sync-method ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config set updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config sync-xattrs false ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config reset updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config \!sync-xattrs ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if geo-rep start updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave start ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Stop geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave delete ++ ++#Cleanup authorized keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old +new file mode 100644 +index 0000000..519acaf +--- /dev/null ++++ b/tests/00-geo-rep/gsyncd.conf.old +@@ -0,0 +1,47 @@ ++[__meta__] ++version = 2.0 ++ ++[peersrx . .] ++remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd ++georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/ ++ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem ++changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log ++working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave} ++ignore_deletes = false ++pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid ++state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status ++gluster_command_dir = /usr/local/sbin/ ++gluster_params = aux-gfid-mount acl ++ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem ++state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status ++state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket ++socketdir = /var/run/gluster ++log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log ++gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log ++special_sync_mode = partial ++change_detector = changelog ++pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid ++state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status ++ ++[__section_order__] ++peersrx . . = 0 ++peersrx . %5essh%3a = 2 ++peersrx . = 3 ++peers master slave = 4 ++ ++[peersrx . %5Essh%3A] ++remote_gsyncd = /nonexistent/gsyncd ++ ++[peersrx .] ++gluster_command_dir = /usr/local/sbin/ ++gluster_params = aux-gfid-mount acl ++log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log ++log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log ++gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log ++ ++[peers master slave] ++session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214 ++master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime ++volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214 ++use_tarssh = true ++ +-- +1.8.3.1 + diff --git a/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch new file mode 100644 index 0000000..a38a4aa --- /dev/null +++ b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch @@ -0,0 +1,284 @@ +From c2decfb59bd1be7cd2b0d792fd2ca2627913638a Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Tue, 24 Sep 2019 18:22:13 +0530 +Subject: [PATCH 310/313] tests : test case for non-root geo-rep setup + +Added test case for non-root geo-rep setup. + +Backport of: + > Patch: https://review.gluster.org/22902 + > Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98 + > fixes: bz#1717827 + > Signed-off-by: Sunny Kumar + +Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98 +BUG: 1763412 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/183664 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 251 +++++++++++++++++++++ + 1 file changed, 251 insertions(+) + create mode 100644 tests/00-geo-rep/00-georep-verify-non-root-setup.t + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +new file mode 100644 +index 0000000..e753c1f +--- /dev/null ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -0,0 +1,251 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=500 ++ ++### Basic Non-root geo-rep setup test with Distribute Replicate volumes ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++ ++##Variables ++GEOREP_CLI="$CLI volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++##User and group to be used for non-root geo-rep setup ++usr="nroot" ++grp="ggroup" ++ ++slave_url=$usr@$slave ++slave_vol=$GSV0 ++ssh_url=$usr@$SH0 ++ ++############################################################ ++#SETUP VOLUMES AND VARIABLES ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Mount master ++#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++ ++########################################################## ++#TEST FUNCTIONS ++ ++function distribute_key_non_root() ++{ ++ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol ++ echo $? ++} ++ ++ ++function check_status_non_root() ++{ ++ local search_key=$1 ++ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l ++} ++ ++ ++function check_and_clean_group() ++{ ++ if [ $(getent group $grp) ] ++ then ++ groupdel $grp; ++ echo $? ++ else ++ echo 0 ++ fi ++} ++ ++function clean_lock_files() ++{ ++ if [ ! -f /etc/passwd.lock ]; ++ then ++ rm -rf /etc/passwd.lock; ++ fi ++ ++ if [ ! -f /etc/group.lock ]; ++ then ++ rm -rf /etc/group.lock; ++ fi ++ ++ if [ ! -f /etc/shadow.lock ]; ++ then ++ rm -rf /etc/shadow.lock; ++ fi ++ ++ if [ ! -f /etc/gshadow.lock ]; ++ then ++ rm -rf /etc/gshadow.lock; ++ fi ++} ++ ++ ++########################################################### ++#SETUP NON-ROOT GEO REPLICATION ++ ++##Create ggroup group ++##First test if group exists and then create new one ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group ++ ++##cleanup *.lock files ++ ++clean_lock_files ++ ++TEST /usr/sbin/groupadd $grp ++ ++clean_lock_files ++##Create non-root user and assign it to newly created group ++ ++TEST /usr/sbin/useradd -G $grp $usr ++ ++##Modify password for non-root user to have control over distributing ssh-key ++echo "$usr:pass" | chpasswd ++ ++##Set up mountbroker root ++TEST gluster-mountbroker setup /var/mountbroker-root $grp ++ ++##Associate volume and non-root user to the mountbroker ++TEST gluster-mountbroker add $slave_vol $usr ++ ++##Check ssh setting for clear text passwords ++sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart ++ ++ ++##Restart glusterd to reflect mountbroker changages ++TEST killall_gluster; ++TEST glusterd; ++TEST pidof glusterd; ++ ++ ++ ++##Create, start and mount meta_volume ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++## Check status of mount-broker ++TEST gluster-mountbroker status ++ ++ ++##Setup password-less ssh for non-root user ++#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url ++##Run ssh agent ++eval "$(ssh-agent -s)" ++PASS="pass" ++ ++ ++##Create a temp script to echo the SSH password, used by SSH_ASKPASS ++ ++SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script ++cat > ${SSH_ASKPASS_SCRIPT} <> ~/.bashrc" ++ ++##Creating secret pem pub file ++TEST gluster-georep-sshkey generate ++ ++##Create geo-rep non-root setup ++ ++TEST $GEOREP_CLI $master $slave_url create push-pem ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++## Test for key distribution ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root ++ ++##Wait for common secret pem file to be created ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file ++ ++#Enable_metavolume ++TEST $GEOREP_CLI $master $slave config use_meta_volume true ++ ++#Start_georep ++TEST $GEOREP_CLI $master $slave_url start ++ ++## Meta volume is enabled so looking for 2 Active and 2 Passive sessions ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active" ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive" ++ ++#Pause geo-replication session ++TEST $GEOREP_CLI $master $slave_url pause ++ ++#Resume geo-replication session ++TEST $GEOREP_CLI $master $slave_url resume ++ ++#Validate failure of volume stop when geo-rep is running ++TEST ! $CLI volume stop $GMV0 ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave_url stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave_url delete ++ ++#Cleanup authorized_keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++#clear mountbroker ++gluster-mountbroker remove --user $usr ++gluster-mountbroker remove --volume $slave_vol ++ ++#delete group and user created for non-root setup ++TEST userdel -r -f $usr ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group ++ ++##password script cleanup ++rm -rf /tmp/ssh-askpass-script ++ ++ ++cleanup; ++ +-- +1.8.3.1 + diff --git a/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch new file mode 100644 index 0000000..af0206a --- /dev/null +++ b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch @@ -0,0 +1,186 @@ +From 4a2441e76f4240568093080769ede07bb7fb2016 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Sun, 20 Oct 2019 01:01:39 +0530 +Subject: [PATCH 311/313] geo-rep: Fix Permission denied traceback on non root + setup + +Problem: +While syncing rename of directory in hybrid crawl, geo-rep +crashes as below. + +Traceback (most recent call last): + File "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker + res = getattr(self.obj, rmeth)(*in_data[2:]) + File "/usr/local/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops + src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) + File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 687, in get_slv_dir_path + [ENOENT], [ESTALE]) + File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 546, in errno_wrap + return call(*arg) +PermissionError: [Errno 13] Permission denied: '/bricks/brick1/b1/.glusterfs/8e/c0/8ec0fcd4-d50f-4a6e-b473-a7943ab66640' + +Cause: +Conversion of gfid to path for a directory uses readlink on backend +.glusterfs gfid path. But this fails for non root user with +permission denied. + +Fix: +Use gfid2path interface to get the path from gfid + +Backport of: + > Patch: https://review.gluster.org/23570 + > Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222 + > fixes: bz#1763439 + > Signed-off-by: Kotresh HR + +Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222 +BUG: 1763412 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/183665 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncd.py | 3 +- + geo-replication/syncdaemon/syncdutils.py | 35 ++++++++++++++++------ + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 30 +++++++++++++++---- + 3 files changed, 52 insertions(+), 16 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 7b48d82..8940384 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -231,7 +231,8 @@ def main(): + # Set default path for config file in that case + # If an subcmd accepts config file then it also accepts + # master and Slave arguments. +- if config_file is None and hasattr(args, "config_file"): ++ if config_file is None and hasattr(args, "config_file") \ ++ and args.subcmd != "slave": + config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % ( + GLUSTERD_WORKDIR, + args.master, +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index aadaebd..b08098e 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -57,6 +57,7 @@ from hashlib import sha256 as sha256 + + # auxiliary gfid based access prefix + _CL_AUX_GFID_PFX = ".gfid/" ++ROOT_GFID = "00000000-0000-0000-0000-000000000001" + GF_OP_RETRIES = 10 + + GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0' +@@ -670,6 +671,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + global slv_bricks + + dir_path = ENOENT ++ pfx = gauxpfx() + + if not slv_bricks: + slv_info = Volinfo(slv_volume, slv_host, master=False) +@@ -683,15 +685,30 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + gfid[2:4], + gfid], [ENOENT], [ESTALE]) + if dir_path != ENOENT: +- realpath = errno_wrap(os.readlink, [dir_path], +- [ENOENT], [ESTALE]) +- if not isinstance(realpath, int): +- realpath_parts = realpath.split('/') +- pargfid = realpath_parts[-2] +- basename = realpath_parts[-1] +- pfx = gauxpfx() +- dir_entry = os.path.join(pfx, pargfid, basename) +- return dir_entry ++ try: ++ realpath = errno_wrap(os.readlink, [dir_path], ++ [ENOENT], [ESTALE]) ++ if not isinstance(realpath, int): ++ realpath_parts = realpath.split('/') ++ pargfid = realpath_parts[-2] ++ basename = realpath_parts[-1] ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry ++ except OSError: ++ # .gfid/GFID ++ gfidpath = unescape_space_newline(os.path.join(pfx, gfid)) ++ realpath = errno_wrap(Xattr.lgetxattr_buf, ++ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE]) ++ if not isinstance(realpath, int): ++ basename = os.path.basename(realpath).rstrip('\x00') ++ dirpath = os.path.dirname(realpath) ++ if dirpath is "/": ++ pargfid = ROOT_GFID ++ else: ++ dirpath = dirpath.strip("/") ++ pargfid = get_gfid_from_mnt(dirpath) ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry + + return None + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +index e753c1f..c9fd8b2 100644 +--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -118,8 +118,8 @@ clean_lock_files + TEST /usr/sbin/groupadd $grp + + clean_lock_files +-##Create non-root user and assign it to newly created group +- ++##Del if exists and create non-root user and assign it to newly created group ++userdel -r -f $usr + TEST /usr/sbin/useradd -G $grp $usr + + ##Modify password for non-root user to have control over distributing ssh-key +@@ -140,8 +140,6 @@ TEST killall_gluster; + TEST glusterd; + TEST pidof glusterd; + +- +- + ##Create, start and mount meta_volume + TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; + TEST $CLI volume start $META_VOL +@@ -225,6 +223,26 @@ TEST $GEOREP_CLI $master $slave_url resume + #Validate failure of volume stop when geo-rep is running + TEST ! $CLI volume stop $GMV0 + ++#Hybrid directory rename test BZ#1763439 ++TEST $GEOREP_CLI $master $slave_url config change_detector xsync ++mkdir ${master_mnt}/dir1 ++mkdir ${master_mnt}/dir1/dir2 ++mkdir ${master_mnt}/dir1/dir3 ++mkdir ${master_mnt}/hybrid_d1 ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir2 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir3 ++ ++mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1 ++mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2 ++mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3 ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3 ++ + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave_url stop + +@@ -232,8 +250,8 @@ TEST $GEOREP_CLI $master $slave_url stop + TEST $GEOREP_CLI $master $slave_url delete + + #Cleanup authorized_keys +-sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys +-sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys + + #clear mountbroker + gluster-mountbroker remove --user $usr +-- +1.8.3.1 + diff --git a/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch new file mode 100644 index 0000000..bf8c820 --- /dev/null +++ b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch @@ -0,0 +1,59 @@ +From b1d8a5ee8b2e320aaaf9b2a145fbc285178d07bb Mon Sep 17 00:00:00 2001 +From: hari gowtham +Date: Tue, 22 Oct 2019 15:11:03 +0530 +Subject: [PATCH 312/313] Scripts: quota_fsck script KeyError: 'contri_size' + + back-port of: https://review.gluster.org/#/c/glusterfs/+/23586/ + +Problem: In a certain code flow, we weren't handling the +unavailability of the contri value in the dict. Trying to print +without the value resulted in erroring out. + +Fix: Have printed the whole of dictionary as the values will be +helpful in understanding the state of the file/dir + +>Fixes: bz#1764129 +>Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725 +>Signed-off-by: hari gowtham + +BUG: 1719171 +Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725 +Signed-off-by: hari gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/183720 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/quota/quota_fsck.py | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index f03895d..485a37a 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -52,17 +52,17 @@ epilog_msg=''' + + def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): + if log_type == QUOTA_VERBOSE: +- print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf}) ++ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf)) + elif log_type == QUOTA_META_ABSENT: +- print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict}) ++ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict)) + elif log_type == QUOTA_SIZE_MISMATCH: + print("mismatch") + if dir_size is not None: +- print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'], +- dir_size}) ++ print('%24s %60s %12s %12s' % ("Size Mismatch", path, ++ xattr_dict, dir_size)) + else: +- print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'], +- stbuf.st_size}) ++ print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict, ++ stbuf.st_size)) + + def size_differs_lot(s1, s2): + ''' +-- +1.8.3.1 + diff --git a/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch new file mode 100644 index 0000000..e4887b8 --- /dev/null +++ b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch @@ -0,0 +1,60 @@ +From 23091d24d34102c7938ae2890930b73c89c5a8e7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 22 Oct 2019 18:52:25 +0530 +Subject: [PATCH 313/313] extras: Cgroup(CPU/Mem) restriction are not working + on gluster process + +Problem: After Configure the Cgroup(CPU/MEM) limit to a gluster processes + resource(CPU/MEM) limits are not applicable to the gluster + processes.Cgroup limits are not applicable because all threads are + not moved into a newly created cgroup to apply restriction. + +Solution: To move a gluster thread to newly created cgroup change the + condition in script + +> Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c +> Fixes: bz#1764208 +> (Cherry pick from commit 38de02012948013a88597545cf49380ce97f6fa7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23599/) +> Signed-off-by: Mohit Agrawal + +Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c +BUG: 1764202 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/183730 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/control-cpu-load.sh | 2 +- + extras/control-mem.sh | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh +index b739c82..52dcf62 100755 +--- a/extras/control-cpu-load.sh ++++ b/extras/control-cpu-load.sh +@@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup." + echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us + + if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then +- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; ++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; + do + echo ${thid} > ${LOC}/${cgroup_name}/tasks ; + done +diff --git a/extras/control-mem.sh b/extras/control-mem.sh +index 38aa2a0..91b36f8 100755 +--- a/extras/control-mem.sh ++++ b/extras/control-mem.sh +@@ -116,7 +116,7 @@ else + fi + + if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then +- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; ++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; + do + echo ${thid} > ${LOC}/${cgroup_name}/tasks ; + done +-- +1.8.3.1 + diff --git a/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch new file mode 100644 index 0000000..adde426 --- /dev/null +++ b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch @@ -0,0 +1,38 @@ +From 2a4f19df70276ba41db19938507297f7580286fa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Fri, 25 Oct 2019 18:07:27 +0530 +Subject: [PATCH 314/314] glusterd/tier: is_tier_enabled inserted causing + checksum mismatch + +the volfile entry is_tier_enabled is checked for version 3.7.6 while it was +supposed to check for 3.10. this is to fix it downstream only but changing the +version of check to 3.13.1 + +Label: DOWNSTREAM ONLY +BUG: 1765555 +Change-Id: Id631f3ba520b3e7b126c7607dca1bb7874532e81 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/183932 +Reviewed-by: Sanju Rakonde +Tested-by: Sanju Rakonde +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 4889217..8a10eb8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1036,7 +1036,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + if (ret) + goto out; + } +- if (conf->op_version >= GD_OP_VERSION_3_10_0) { ++ if (conf->op_version >= GD_OP_VERSION_3_13_1) { + snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled); + ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch new file mode 100644 index 0000000..a0448cc --- /dev/null +++ b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch @@ -0,0 +1,52 @@ +From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 12 Nov 2019 21:53:20 +0530 +Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce + +Geo-rep fails to start on python2 only machine like +centos6. It fails with "ImportError no module named _io". +This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23702 + > fixes: bz#1771577 + > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec + > Signed-off-by: Kotresh HR + +BUG: 1771524 +Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/185377 +Tested-by: RHGS Build Bot +Reviewed-by: Sunny Kumar +--- + geo-replication/syncdaemon/repce.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py +index 6065b82..c622afa 100644 +--- a/geo-replication/syncdaemon/repce.py ++++ b/geo-replication/syncdaemon/repce.py +@@ -8,7 +8,6 @@ + # cases as published by the Free Software Foundation. + # + +-import _io + import os + import sys + import time +@@ -58,9 +57,9 @@ def recv(inf): + """load an object from input stream + python2 and python3 compatibility, inf is sys.stdin + and is opened as text stream by default. Hence using the +- buffer attribute ++ buffer attribute in python3 + """ +- if isinstance(inf, _io.TextIOWrapper): ++ if hasattr(inf, "buffer"): + return pickle.load(inf.buffer) + else: + return pickle.load(inf) +-- +1.8.3.1 + diff --git a/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch new file mode 100644 index 0000000..c2045a0 --- /dev/null +++ b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch @@ -0,0 +1,51 @@ +From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 14 Nov 2019 09:55:15 +0000 +Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6 + +Installing glusterfs on rhel6 was failing with python-prettytable +dependency as it required python2-prettytable for glusterfs-events. +This patch conditionally sets the python version for rhel7 and +fixes the problem. + +Label: DOWNSTREAM ONLY + +BUG: 1771614 + +Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5 +fixes: bz#1771614 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185385 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3c2e2dc..eeadb65 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client. + %package events + Summary: GlusterFS Events + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: python%{_pythonver} python%{_pythonver}-prettytable ++Requires: python%{_pythonver} + Requires: python%{_pythonver}-gluster = %{version}-%{release} + %if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: python-requests +@@ -714,7 +714,10 @@ Requires: python-requests + Requires: python%{_pythonver}-requests + %endif + %if ( 0%{?rhel} && 0%{?rhel} < 7 ) ++Requires: python-prettytable + Requires: python-argparse ++%else ++Requires: python%{_pythonver}-prettytable + %endif + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} +-- +1.8.3.1 + diff --git a/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..eccf2e3 --- /dev/null +++ b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,43 @@ +From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 02:25:25 -0500 +Subject: [PATCH 317/335] Update rfc.sh to rhgs-3.5.1 + +Signed-off-by: Rinku Kothiya +--- + README | 9 +++++++++ + rfc.sh | 2 +- + 2 files changed, 10 insertions(+), 1 deletion(-) + create mode 100644 README + +diff --git a/README b/README +new file mode 100644 +index 0000000..44a118b +--- /dev/null ++++ b/README +@@ -0,0 +1,9 @@ ++ ++'master' branch is just dummy branch in downstream. Any reference to 'upstream' ++will point to http://git.gluster.org. ++ ++You can checkout the release specific branch by running below command ++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y ++ ++Happy Hacking!! ++ +diff --git a/rfc.sh b/rfc.sh +index 94c92ef..69ddd2b 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.0"; ++branch="rhgs-3.5.1"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..e65ae38 --- /dev/null +++ b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,114 @@ +From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 14:25:12 -0500 +Subject: [PATCH 318/335] Update rfc.sh to rhgs-3.5.1 + +Removed the checks for updates and fixes from rfc.sh + +Label: DOWNSTREAM ONLY + +Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7 +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 47 ++++++++--------------------------------------- + 1 file changed, 8 insertions(+), 39 deletions(-) + +diff --git a/rfc.sh b/rfc.sh +index 69ddd2b..918fb11 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -129,13 +129,8 @@ editor_mode() + + if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then + # see note above function warn_reference_missing for regex elaboration +- # Lets first check for github issues +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}'); +- if [ "x${ref}" = "x" ]; then +- # if not found, check for bugs +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}'); +- fi + ++ ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+") + if [ "x${ref}" != "x" ]; then + return; + fi +@@ -157,16 +152,6 @@ editor_mode() + bz_string="" + fi + +- echo "Select yes '(y)' if this patch fixes the bug/feature completely," +- echo -n "or is the last of the patchset which brings feature (Y/n): " +- read fixes +- fixes_string="fixes" +- if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then +- fixes_string="updates" +- fi +- +- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \ +- mv $1.new $1; + return; + done + fi +@@ -234,8 +219,8 @@ check_patches_for_coding_style() + # IOW, the above helps us find the pattern with leading or training spaces + # or non word consituents like , or ; + # +-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS]) +-# Finds 'fixes' OR 'updates' in any case combination ++# [bB][uU][gG] ++# Finds 'bug' in any case + # + # (:)? + # Followed by an optional : (colon) +@@ -256,28 +241,11 @@ warn_reference_missing() + echo "" + echo "=== Missing a reference in commit! ===" + echo "" +- echo "Gluster commits are made with a reference to a bug or a github issue" +- echo "" +- echo "Submissions that are enhancements (IOW, not functional" +- echo "bug fixes, but improvements of any nature to the code) are tracked" +- echo "using github issues [1]." ++ echo "You must give BUG: " + echo "" +- echo "Submissions that are bug fixes are tracked using Bugzilla [2]." ++ echo "for example:" + echo "" +- echo "A check on the commit message, reveals that there is no bug or" +- echo "github issue referenced in the commit message" +- echo "" +- echo "[1] https://github.com/gluster/glusterfs/issues/new" +- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS" +- echo "" +- echo "Please file an issue or a bug report and reference the same in the" +- echo "commit message using the following tags:" +- echo "GitHub Issues:" +- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\"," +- echo "\"Fixes: #n\" OR \"Updates: #n\"," +- echo "Bugzilla ID:" +- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\"," +- echo "where n is the issue or bug number" ++ echo "BUG: 1234567" + echo "" + echo "You may abort the submission choosing 'N' below and use" + echo "'git commit --amend' to add the issue reference before posting" +@@ -312,7 +280,7 @@ main() + assert_diverge; + + # see note above function warn_reference_missing for regex elaboration +- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}'); ++ reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk '{print $2}') + + # If this is a commit against master and does not have a bug ID or a github + # issue reference. Warn the contributor that one of the 2 is required +@@ -320,6 +288,7 @@ main() + warn_reference_missing; + fi + ++ + # TODO: add clang-format command here. It will after the changes are done everywhere else + clang_format=$(clang-format --version) + if [ ! -z "${clang_format}" ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch new file mode 100644 index 0000000..d37efaf --- /dev/null +++ b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch @@ -0,0 +1,48 @@ +From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Tue, 2 Jul 2019 16:50:23 -0400 +Subject: [PATCH 319/335] features/snapview-server: obtain the list of + snapshots inside the lock + +The current list of snapshots from priv->dirents is obtained outside +the lock. + +Upstream patch: +> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +> Fixes: bz#1726783 +> Signed-off-by: Raghavendra Bhat +> patch: https://review.gluster.org/#/c/glusterfs/+/22990/ + +BUG: 1731513 +Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +Signed-off-by: Raghavendra Bhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/185838 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index bc415ef..3d64383 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + this = frame->this; + ctx = frame->this->ctx; + priv = this->private; +- old_dirents = priv->dirents; + + if (!ctx) { + errno = EINVAL; +@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + LOCK(&priv->snaplist_lock); + { + oldcount = priv->num_snaps; ++ old_dirents = priv->dirents; + for (i = 0; i < priv->num_snaps; i++) { + for (j = 0; j < snapcount; j++) { + if ((!strcmp(old_dirents[i].name, dirents[j].name)) && +-- +1.8.3.1 + diff --git a/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch new file mode 100644 index 0000000..48a9cad --- /dev/null +++ b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch @@ -0,0 +1,58 @@ +From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 24 Oct 2019 12:24:35 +0530 +Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers + +Problem: +glfsheal program uses unix-socket-based volfile server. +volfile server will be the path to socket in this case. +gf_event expects this to be hostname in all cases. So getaddrinfo +will fail on the unix-socket path, events won't be sent in this case. + +Fix: +In case of unix sockets, default to localhost + +upstream-patch: https://review.gluster.org/c/glusterfs/+/23606 +BUG: 1758923 +Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185851 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/events.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 9d33783..4e2f8f9 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo *result = NULL; + xlator_t *this = THIS; + int sin_family = AF_INET; ++ char *volfile_server_transport = NULL; + + /* Global context */ + ctx = THIS->ctx; +@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + ++ if (ctx) { ++ volfile_server_transport = ctx->cmd_args.volfile_server_transport; ++ } ++ ++ if (!volfile_server_transport) { ++ volfile_server_transport = "tcp"; ++ } + /* Get Host name to send message */ +- if (ctx && ctx->cmd_args.volfile_server) { ++ if (ctx && ctx->cmd_args.volfile_server && ++ (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ + if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +-- +1.8.3.1 + diff --git a/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch new file mode 100644 index 0000000..8dec96f --- /dev/null +++ b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch @@ -0,0 +1,55 @@ +From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Mon, 24 Jun 2019 20:08:48 +0530 +Subject: [PATCH 321/335] Adding white spaces to description of set group. + +The description of set group is missing spaces which +leads to the description look like: +volume set group - This option can be used for +setting multiple pre-defined volume optionswhere group_name is a +file under /var/lib/glusterd/groups containing onekey, value pair +per line + +Instead of: +volume set group - This option can be used for +setting multiple pre-defined volume options where group_name is a +file under /var/lib/glusterd/groups containing one key value +pair per line + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22934/ +> Fixes: bz#1723455 +> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +> Signed-off-by: kshithijiyer + +BUG: 1724021 +Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185756 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6b958bd..66beb1b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = { + {"volume set ", cli_cmd_volume_set_cbk, + "set options for volume "}, + +- {"volume set group ", cli_cmd_volume_set_cbk, +- "This option can be used for setting multiple pre-defined volume options" +- "where group_name is a file under /var/lib/glusterd/groups containing one" +- "key, value pair per line"}, ++ {"volume set group ", cli_cmd_volume_set_cbk, ++ "This option can be used for setting multiple pre-defined volume options " ++ "where group_name is a file under /var/lib/glusterd/groups containing one " ++ "key value pair per line"}, + + {"volume log rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, +-- +1.8.3.1 + diff --git a/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch new file mode 100644 index 0000000..35a234b --- /dev/null +++ b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch @@ -0,0 +1,65 @@ +From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 16 Oct 2019 23:26:03 +0530 +Subject: [PATCH 322/335] glusterd: display correct rebalance data size after + glusterd restart + +Problem: After completion of rebalance, if glusterd is restarted, +rebalance status displays wrong rebalance data size in its output. + +Cause: While glusterd restoring the information from /var/lib/glusterd/ +into its memory, glusterd fetches rebalance_data from +/var/lib/glusterd/vols/volname/node_state.info. This value is +converted into an integer using atoi(), which is returning +incorrect value for larger values. + +Solution: use sscanf() instead of atoi() to convert string to +integer(in this case it is unsigned long) + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23560/ +> fixes: bz#1762438 +> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +> Signed-off-by: Sanju Rakonde + +BUG: 1761486 +Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185752 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 8a10eb8..b3b5ee9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) + volinfo->rebal.op = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) { +- volinfo->rebal.rebalance_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) { +- volinfo->rebal.rebalance_data = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) { +- volinfo->rebal.lookedup_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) { +- volinfo->rebal.rebalance_failures = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) { +- volinfo->rebal.skipped_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { + volinfo->rebal.rebalance_time = atoi(value); +-- +1.8.3.1 + diff --git a/SOURCES/0323-cli-display-detailed-rebalance-info.patch b/SOURCES/0323-cli-display-detailed-rebalance-info.patch new file mode 100644 index 0000000..a00faf8 --- /dev/null +++ b/SOURCES/0323-cli-display-detailed-rebalance-info.patch @@ -0,0 +1,101 @@ +From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 22 Oct 2019 15:06:29 +0530 +Subject: [PATCH 323/335] cli: display detailed rebalance info + +Problem: When one of the node is down in cluster, +rebalance status is not displaying detailed +information. + +Cause: In glusterd_volume_rebalance_use_rsp_dict() +we are aggregating rsp from all the nodes into a +dictionary and sending it to cli for printing. While +assigning a index to keys we are considering all the +peers instead of considering only the peers which are +up. Because of which, index is not reaching till 1. +while parsing the rsp cli unable to find status-1 +key in dictionary and going out without printing +any information. + +Solution: The simplest fix for this without much +code change is to continue to look for other keys +when status-1 key is not found. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588 +> fixes: bz#1764119 +> Change-Id: I0062839933c9706119eb85416256eade97e976dc +> Signed-off-by: Sanju Rakonde + +BUG: 1761326 +Change-Id: I0062839933c9706119eb85416256eade97e976dc +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185749 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 21 ++++++++++++++------- + tests/bugs/glusterd/rebalance-in-cluster.t | 9 +++++++++ + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index b167e26..4e91265 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type, + goto out; + } + +- snprintf(key, sizeof(key), "status-1"); +- +- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); +- if (ret) { +- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1); +- gf_log("cli", GF_LOG_TRACE, "failed to get status"); +- goto out; ++ for (i = 1; i <= count; i++) { ++ snprintf(key, sizeof(key), "status-%d", i); ++ ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); ++ /* If information from a node is missing we should skip ++ * the node and try to fetch information of other nodes. ++ * If information is not found for all nodes, we should ++ * error out. ++ */ ++ if (!ret) ++ break; ++ if (ret && i == count) { ++ gf_log("cli", GF_LOG_TRACE, "failed to get status"); ++ goto out; ++ } + } + + /* Fix layout will be sent to all nodes for the volume +diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t +index 9565fae..469ec6c 100644 +--- a/tests/bugs/glusterd/rebalance-in-cluster.t ++++ b/tests/bugs/glusterd/rebalance-in-cluster.t +@@ -4,6 +4,10 @@ + . $(dirname $0)/../../cluster.rc + . $(dirname $0)/../../volume.rc + ++function rebalance_status_field_1 { ++ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p ++} ++ + cleanup; + TEST launch_cluster 2; + TEST $CLI_1 peer probe $H2; +@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 + TEST $CLI_1 volume rebalance $V0 start + EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0 + ++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm ++TEST kill_glusterd 2 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0 ++ ++TEST start_glusterd 2 + #bug-1245142 + + $CLI_1 volume rebalance $V0 start & +-- +1.8.3.1 + diff --git a/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch new file mode 100644 index 0000000..26e1577 --- /dev/null +++ b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch @@ -0,0 +1,128 @@ +From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 6 Jun 2019 18:33:19 +0530 +Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during + add-brick + +Backport of https://review.gluster.org/c/glusterfs/+/22834 + +Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185855 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../add-brick/post/S10selinux-label-brick.sh | 100 +++++++++++++++++++++ + 1 file changed, 100 insertions(+) + create mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +new file mode 100755 +index 0000000..4a17c99 +--- /dev/null ++++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +@@ -0,0 +1,100 @@ ++#!/bin/bash ++# ++# Install to hooks//add-brick/post ++# ++# Add an SELinux file context for each brick using the glusterd_brick_t type. ++# This ensures that the brick is relabeled correctly on an SELinux restart or ++# restore. Subsequently, run a restore on the brick path to set the selinux ++# labels. ++# ++### ++ ++PROGNAME="Sselinux" ++OPTSPEC="volname:,version:,gd-workdir:,volume-op:" ++VOL= ++ ++parse_args () { ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ --version) ++ shift ++ ;; ++ --volume-op) ++ shift ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++set_brick_labels() ++{ ++ local volname="${1}" ++ local fctx ++ local list=() ++ ++ fctx="$(semanage fcontext --list -C)" ++ ++ # wait for new brick path to be updated under ++ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ ++ sleep 5 ++ ++ # grab the path for each local brick ++ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ # create a list of bricks for which custom SELinux ++ # label doesn't exist ++ for b in ${brickdirs}; do ++ pattern="${b}(/.*)?" ++ echo "${fctx}" | grep "^${pattern}\s" >/dev/null ++ if [[ $? -ne 0 ]]; then ++ list+=("${pattern}") ++ fi ++ done ++ ++ # Add a file context for each brick path in the list and associate with the ++ # glusterd_brick_t SELinux type. ++ for p in ${list[@]} ++ do ++ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" ++ done ++ ++ # Set the labels for which SELinux label was added above ++ for b in ${brickdirs} ++ do ++ echo "${list[@]}" | grep "${b}" >/dev/null ++ if [[ $? -eq 0 ]]; then ++ restorecon -R "${b}" ++ fi ++ done ++} ++ ++SELINUX_STATE=$(which getenforce && getenforce) ++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 ++ ++parse_args "$@" ++[ -z "${VOL}" ] && exit 1 ++ ++set_brick_labels "${VOL}" ++ ++exit 0 +-- +1.8.3.1 + diff --git a/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch new file mode 100644 index 0000000..8e5a5fa --- /dev/null +++ b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch @@ -0,0 +1,52 @@ +From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Wed, 12 Jun 2019 15:41:27 +0530 +Subject: [PATCH 325/335] extras/hooks: Install and package newly added post + add-brick hook script + +Previously a new SELinux hook script was added as a post add-brick +operation to label new brick paths. But the change failed to install +and package new script. Therefore making necessary changes to Makefile +and spec file to get it installed and packaged. + +Backport of https://review.gluster.org/c/glusterfs/+/22856 + +Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225 +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185856 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/add-brick/post/Makefile.am | 4 ++-- + glusterfs.spec.in | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index bfc0c1c..9b236df 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eeadb65..91180db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,6 +1447,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh +-- +1.8.3.1 + diff --git a/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch new file mode 100644 index 0000000..b0afcc7 --- /dev/null +++ b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch @@ -0,0 +1,51 @@ +From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 17 Jun 2019 11:10:42 +0530 +Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux + regrssion + +To avoid the failure wait to run hook script S13create-subdir-mounts.sh +after executed add-brick command by test case. + +This is required as a dependency for the bz referenced below. + +Backport of https://review.gluster.org/c/glusterfs/+/22877 + +Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e +BUG: 1686800 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185857 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/subdir-mount.t | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index 8401946..a02bd6b 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,12 +85,17 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + ++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Give time for client process to get notified and use the new +-# volfile after add-brick +-sleep 1 ++# Wait to execute create-subdir-mounts.sh script by glusterd ++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++while [ $newcnt -eq $initcnt ] ++do ++ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++ sleep 1 ++done + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch new file mode 100644 index 0000000..e3e42fa --- /dev/null +++ b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch @@ -0,0 +1,93 @@ +From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 19 Feb 2019 12:49:12 +0530 +Subject: [PATCH 327/335] glusterfind: integrate with gfid2path + +Integration with gfid2path helps avoid file-system crawl and saves +precious time. Extended attributes starting with "trusted.gfid2path." +are read and the / values are extracted and the is +iteratively resolved from the brick backend to arrive at the full path. + +>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +>fixes: #529 +>Signed-off-by: Milind Changire +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/22225/ +BUG: 1599802 +Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +Signed-off-by: Milind Changire +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185706 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/changelog.py | 45 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index ef982db..d8f97e0 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + continue + + ++def enum_hard_links_using_gfid2path(brick, gfid, args): ++ hardlinks = [] ++ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) ++ if not os.path.isdir(p): ++ # we have a symlink or a normal file ++ try: ++ file_xattrs = xattr.list(p) ++ for x in file_xattrs: ++ if x.startswith("trusted.gfid2path."): ++ # get the value for the xattr i.e. / ++ v = xattr.getxattr(p, x) ++ pgfid, bn = v.split(os.sep) ++ try: ++ path = symlink_gfid_to_path(brick, pgfid) ++ fullpath = os.path.join(path, bn) ++ fullpath = output_path_prepare(fullpath, args) ++ hardlinks.append(fullpath) ++ except (IOError, OSError) as e: ++ logger.warn("Error converting to path: %s" % e) ++ continue ++ except (IOError, OSError): ++ pass ++ return hardlinks ++ ++ ++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): ++ path = "" ++ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): ++ gfid = row[3].strip() ++ logger.debug("Processing gfid %s" % gfid) ++ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) ++ ++ path = ",".join(hardlinks) ++ ++ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) ++ ++ + def gfid_to_path_using_pgfid(brick, changelog_data, args): + """ + For all the pgfids collected, Converts to Path and +@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") + +- # Convert all GFIDs for which no other additional details available +- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") +- gfid_to_path_using_pgfid(brick, changelog_data, args) ++ # Convert all gfids recorded for data and metadata to all hardlink paths ++ logger.info("[3/4] Starting 'gfid2path' conversions ...") ++ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) + changelog_data.commit() +- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") ++ logger.info("[3/4] Finished 'gfid2path' conversions.") + + # If some GFIDs fail to get converted from previous step, + # convert using find +-- +1.8.3.1 + diff --git a/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch new file mode 100644 index 0000000..0d12daa --- /dev/null +++ b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch @@ -0,0 +1,55 @@ +From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001 +From: Vishal Pandey +Date: Wed, 7 Aug 2019 12:53:06 +0530 +Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures + in migration during remove-brick commit + +Problem - +Currently remove-brick commit goes through even though there were files +that failed to migrate or were skipped. There is no warning raised to the user. +Solution- +Add a check in the remove brick staging phase to verify if the status of the +rebalnce process is complete but there has been failures or some skipped files +while migration, In this case user will be given a warning and remove-brick +commit. User will need to use the force option to remove the bricks. + +> Upstream Path Link: https://review.gluster.org/#/c/glusterfs/+/23171/ +> Fixes: bz#1514683 +> Signed-offby- Vishal Pandey +> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 + +BUG: 1344758 +Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 +Signed-off-by: Vishal Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/185831 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..c5141de 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + goto out; + } + ++ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) { ++ if (volinfo->rebal.rebalance_failures > 0 || ++ volinfo->rebal.skipped_files > 0) { ++ errstr = gf_strdup( ++ "use 'force' option as migration " ++ "of some files might have been skipped or " ++ "has failed"); ++ goto out; ++ } ++ } ++ + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch new file mode 100644 index 0000000..935824d --- /dev/null +++ b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch @@ -0,0 +1,165 @@ +From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 20 Nov 2019 12:26:11 +0530 +Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no + healed_sinks + +Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/ + +Problem: +In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame +anything for entry heal, heal will not complete even though we have +clear source and sinks. This will happen because while doing +afr_selfheal_find_direction() only the bricks which are blamed by +non-accused bricks are considered as sinks. Later in +__afr_selfheal_entry_finalize_source() when it tries to mark all the +non-sources as sinks it fails to do so because there won't be any +healed_sinks marked, no witness present and there will be a source. + +Fix: +If there is a source and no healed_sinks, then reset all the locked +sources to 0 and healed sinks to 1 to do conservative merge. + +Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57 +Signed-off-by: karthik-us +BUG: 1764095 +Reviewed-on: https://code.engineering.redhat.com/gerrit/185834 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++ + 2 files changed, 104 insertions(+) + create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t + +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +new file mode 100644 +index 0000000..9627908 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 35b600f..3ce882e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; ++ int i = 0; + + priv = this->private; + +@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + } + + source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); ++ ++ /*If the selected source does not blame any other brick, then mark ++ * everything as sink to trigger conservative merge. ++ */ ++ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) { ++ for (i = 0; i < priv->child_count; i++) { ++ if (locked_on[i]) { ++ sources[i] = 0; ++ healed_sinks[i] = 1; ++ } ++ } ++ return -1; ++ } ++ + return source; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0330-mount.glusterfs-change-the-error-message.patch b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch new file mode 100644 index 0000000..b64f0c6 --- /dev/null +++ b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch @@ -0,0 +1,59 @@ +From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 13 Mar 2019 08:51:31 +0530 +Subject: [PATCH 330/335] mount.glusterfs: change the error message + +In scenarios where a mount fails before creating log file, doesn't +make sense to give message to 'check log file'. See below: + +``` +ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device) +ERROR: failed to open logfile /var/log/glusterfs/mnt.log +Mount failed. Please check the log file for more details. +``` + +>upstream patch: https://review.gluster.org/#/c/glusterfs/+/22346/ +>Fixes: bz#1688068 +>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +>Signed-off-by: Amar Tumballi + +BUG: 1685406 +Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/185828 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/utils/mount.glusterfs.in | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 3f5d76d..cbde42d 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -361,7 +361,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line $mount_point"); + $cmd_line; + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # If this is true, then glusterfs process returned error without ++ # getting daemonized. We have made sure the logs are posted to ++ # 'stderr', so no need to point them to logfile. ++ warn "Mounting glusterfs on $mount_point failed." + exit 1; + fi + +@@ -369,7 +372,9 @@ start_glusterfs () + inode=$( ${getinode} $mount_point 2>/dev/null); + # this is required if the stat returns error + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # At this time, glusterfs got daemonized, and then later exited. ++ # These failures are only logged in log file. ++ warn "Mount failed. Check the log file ${log_file} for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; + fi +-- +1.8.3.1 + diff --git a/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch new file mode 100644 index 0000000..6eb15b0 --- /dev/null +++ b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch @@ -0,0 +1,44 @@ +From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 30 Oct 2019 10:47:17 +0530 +Subject: [PATCH 331/335] features/locks: Do special handling for op-version < + 3.12.0 + +Problem: +Patch https://code.engineering.redhat.com/gerrit/#/c/140080/ diverges from +its upstream patch(https://review.gluster.org/c/glusterfs/+/20031) in op-version. +On upstream special-handling happens for version < 3.10.0 whereas for downstream +special-handling happens for version < 3.12.0. + When rebase happened for 3.5.0 from upstream, this downstream specific change +is missed as there was no special downstream-only patch tracking this difference. +This leads to I/O errors on upgrade from 3.3.1->3.5.0 + +Fix: +Do special handling for op-version < 3.12.0 as in 3.4.x + +Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d +Label: DOWNSTREAM ONLY +BUG: 1766640 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185835 +Reviewed-by: Xavi Hernandez Juan +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9db5ac6..4592240 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + do { \ + pl_local_t *__local = NULL; \ + if (frame->root->client && \ +- (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \ ++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { \ + __local = frame->local; \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + } else { \ +-- +1.8.3.1 + diff --git a/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch new file mode 100644 index 0000000..c9b2b56 --- /dev/null +++ b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch @@ -0,0 +1,57 @@ +From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Fri, 28 Jun 2019 15:32:31 +0530 +Subject: [PATCH 332/335] Removing one top command from gluster v help + +The current help show 2 different top commands +intead of one single top command which can be +easily observed when "# gluster v help" command +is issued. Removing one "volume top " +and clubbing into them into a single command. + +Current help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | +volume top {read-perf|write-perf} +[bs count ] [brick ] +[list-cnt ] - volume top operations + +Expected help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | {read-perf|write-perf} +[bs count ] [brick ] [list-cnt ] +- volume top operations + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22972/ +> fixes: bz#1725034 +> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +> Signed-off-by: kshithijiyer + +BUG: 1726058 +Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185757 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 66beb1b..754d333 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = { + cli_cmd_volume_profile_cbk, "volume profile operations"}, + + {"volume top {open|read|write|opendir|readdir|clear} [nfs|brick " +- "] [list-cnt ] |\n" +- "volume top {read-perf|write-perf} [bs count ] " ++ "] [list-cnt ] | " ++ "{read-perf|write-perf} [bs count ] " + "[brick ] [list-cnt ]", + cli_cmd_volume_top_cbk, "volume top operations"}, + +-- +1.8.3.1 + diff --git a/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch new file mode 100644 index 0000000..b1d94b4 --- /dev/null +++ b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch @@ -0,0 +1,195 @@ +From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 3 Oct 2019 14:06:52 +0530 +Subject: [PATCH 333/335] rpc: Synchronize slot allocation code + +Problem: Current slot allocation/deallocation code path is not + synchronized.There are scenario when due to race condition + in slot allocation/deallocation code path brick is crashed. + +Solution: Synchronize slot allocation/deallocation code path to + avoid the issue + +> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +> Fixes: bz#1763036 +> Signed-off-by: Mohit Agrawal +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23508/) +> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2) + +Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +BUG: 1741193 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185827 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 0cec47e..65f5efd 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx) + } + + static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ if (!slot) ++ return -1; ++ ++ return GF_ATOMIC_INC(slot->ref); ++} ++ ++static int + __event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int i = 0; ++ int j = 0; + int table_idx = -1; + int gen = -1; + struct event_slot_epoll *table = NULL; + +- for (i = 0; i < EVENT_EPOLL_TABLES; i++) { ++retry: ++ ++ while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: + continue; +@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + if (table) + /* break out of the loop */ + break; ++ i++; + } + + if (!table) +@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { ++ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) { ++ if (table[j].fd == -1) { + /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset(&table[i], 0, sizeof(table[i])); +- table[i].gen = gen + 1; ++ gen = table[j].gen; ++ memset(&table[j], 0, sizeof(table[j])); ++ table[j].gen = gen + 1; + +- LOCK_INIT(&table[i].lock); +- INIT_LIST_HEAD(&table[i].poller_death); ++ LOCK_INIT(&table[j].lock); ++ INIT_LIST_HEAD(&table[j].poller_death); + +- table[i].fd = fd; ++ table[j].fd = fd; + if (notify_poller_death) { +- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; +- list_add_tail(&table[i].poller_death, ++ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j; ++ list_add_tail(&table[j].poller_death, + &event_pool->poller_death); + } + +@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + } + } + +- return table_idx * EVENT_EPOLL_SLOTS + i; ++ if (j == EVENT_EPOLL_SLOTS) { ++ table = NULL; ++ i++; ++ goto retry; ++ } else { ++ (*slot) = &table[j]; ++ event_slot_ref(*slot); ++ return table_idx * EVENT_EPOLL_SLOTS + j; ++ } + } + + static int + event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int idx = -1; + + pthread_mutex_lock(&event_pool->mutex); + { +- idx = __event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot); + } + pthread_mutex_unlock(&event_pool->mutex); + +@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + int offset = 0; + struct event_slot_epoll *table = NULL; + struct event_slot_epoll *slot = NULL; ++ int fd = -1; + + table_idx = idx / EVENT_EPOLL_SLOTS; + offset = idx % EVENT_EPOLL_SLOTS; +@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + slot = &table[offset]; + slot->gen++; + ++ fd = slot->fd; + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; + list_del_init(&slot->poller_death); +- event_pool->slots_used[table_idx]--; ++ if (fd != -1) ++ event_pool->slots_used[table_idx]--; + + return; + } +@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx) + return; + } + +-static int +-event_slot_ref(struct event_slot_epoll *slot) +-{ +- if (!slot) +- return -1; +- +- return GF_ATOMIC_INC(slot->ref); +-} +- + static struct event_slot_epoll * + event_slot_get(struct event_pool *event_pool, int idx) + { +@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot); + if (idx == -1) { + gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); + return -1; + } + +- slot = event_slot_get(event_pool, idx); +- if (!slot) { +- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, +- "could not find slot for fd=%d idx=%d", fd, idx); +- return -1; +- } +- + assert(slot->fd == fd); + + LOCK(&slot->lock); +-- +1.8.3.1 + diff --git a/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch new file mode 100644 index 0000000..48f927f --- /dev/null +++ b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch @@ -0,0 +1,54 @@ +From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Tue, 16 Jul 2019 10:31:46 +0530 +Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG" + +There are two ways to fetch node-uuid information from dht. + +1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids" +This key is used by AFR. + +2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol" +This key is used for non-afr volume type. + +We do two getxattr operations. First on the #1 key followed by on #2 if +getxattr on #1 key fails. + +Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure, +moving the log-level to DEBUG in dht_find_local_subvol_cbk. + +>fixes: bz#1730175 +>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +>Signed-off-by: Susant Palai +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23053/ + +BUG: 1727755 +Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185876 +Tested-by: RHGS Build Bot +--- + xlators/cluster/dht/src/dht-common.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 37952ba..d0b5287 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->op_ret = -1; + local->op_errno = op_errno; + UNLOCK(&frame->lock); +- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, +- "getxattr err for dir"); ++ if (op_errno == ENODATA) ++ gf_msg_debug(this->name, 0, "failed to get node-uuid"); ++ else ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid"); + goto post_unlock; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch new file mode 100644 index 0000000..c3341df --- /dev/null +++ b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch @@ -0,0 +1,15991 @@ +From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Thu, 17 Oct 2019 13:03:56 +0530 +Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS + +- tests/bugs/shard/bug-1272986.t + https://review.gluster.org/#/c/glusterfs/+/23499/ + https://review.gluster.org/#/c/glusterfs/+/23551/ + +- tests/basic/posix/shared-statfs.t + https://review.gluster.org/c/glusterfs/+/23550 + +- tests/basic/fops-sanity.t + https://review.gluster.org/c/glusterfs/+/22210/ + +- tests/bugs/transport/bug-873367.t +- tests/features/ssl-authz.t +- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t + https://review.gluster.org/#/c/glusterfs/+/23587/ + +- remove gnfs relatedtests + +- tests/bugs/shard/unlinks-and-renames.t + https://review.gluster.org/#/c/glusterfs/+/23585/ + +- tests/bugs/rpc/bug-954057.t +- tests/bugs/glusterfs-server/bug-887145.t + https://review.gluster.org/#/c/glusterfs/+/23710/ + +- tests/features/ssl-ciphers.t + https://review.gluster.org/#/c/glusterfs/+/23703/ + +- tests/bugs/fuse/bug-985074.t + https://review.gluster.org/#/c/glusterfs/+/23734/ + +BUG: 1762180 +Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185716 +Tested-by: RHGS Build Bot +--- + tests/basic/fops-sanity.c | 1862 ++-- + tests/basic/posix/shared-statfs.t | 11 +- + tests/bugs/cli/bug-1320388.t | 2 +- + tests/bugs/fuse/bug-985074.t | 4 +- + tests/bugs/glusterd/quorum-value-check.t | 35 - + tests/bugs/glusterfs-server/bug-887145.t | 14 +- + tests/bugs/nfs/bug-1053579.t | 114 - + tests/bugs/nfs/bug-1116503.t | 47 - + tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 - + tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 - + tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 - + tests/bugs/nfs/bug-1166862.t | 69 - + tests/bugs/nfs/bug-1210338.c | 31 - + tests/bugs/nfs/bug-1210338.t | 30 - + tests/bugs/nfs/bug-1302948.t | 13 - + tests/bugs/nfs/bug-847622.t | 39 - + tests/bugs/nfs/bug-877885.t | 39 - + tests/bugs/nfs/bug-904065.t | 100 - + tests/bugs/nfs/bug-915280.t | 54 - + tests/bugs/nfs/bug-970070.t | 13 - + tests/bugs/nfs/bug-974972.t | 41 - + tests/bugs/nfs/showmount-many-clients.t | 41 - + tests/bugs/nfs/socket-as-fifo.py | 33 - + tests/bugs/nfs/socket-as-fifo.t | 25 - + tests/bugs/nfs/subdir-trailing-slash.t | 32 - + tests/bugs/nfs/zero-atime.t | 33 - + tests/bugs/rpc/bug-954057.t | 10 +- + tests/bugs/shard/bug-1272986.t | 6 +- + tests/bugs/transport/bug-873367.t | 2 +- + tests/features/ssl-authz.t | 2 +- + tests/features/ssl-ciphers.t | 61 +- + tests/ssl.rc | 2 +- + xlators/features/shard/src/shard.c | 11754 ++++++++++---------- + 33 files changed, 6638 insertions(+), 8070 deletions(-) + delete mode 100755 tests/bugs/glusterd/quorum-value-check.t + delete mode 100755 tests/bugs/nfs/bug-1053579.t + delete mode 100644 tests/bugs/nfs/bug-1116503.t + delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t + delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t + delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t + delete mode 100755 tests/bugs/nfs/bug-1166862.t + delete mode 100644 tests/bugs/nfs/bug-1210338.c + delete mode 100644 tests/bugs/nfs/bug-1210338.t + delete mode 100755 tests/bugs/nfs/bug-1302948.t + delete mode 100755 tests/bugs/nfs/bug-847622.t + delete mode 100755 tests/bugs/nfs/bug-877885.t + delete mode 100755 tests/bugs/nfs/bug-904065.t + delete mode 100755 tests/bugs/nfs/bug-915280.t + delete mode 100755 tests/bugs/nfs/bug-970070.t + delete mode 100755 tests/bugs/nfs/bug-974972.t + delete mode 100644 tests/bugs/nfs/showmount-many-clients.t + delete mode 100755 tests/bugs/nfs/socket-as-fifo.py + delete mode 100644 tests/bugs/nfs/socket-as-fifo.t + delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t + delete mode 100755 tests/bugs/nfs/zero-atime.t + +diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c +index aff72d8..171d003 100644 +--- a/tests/basic/fops-sanity.c ++++ b/tests/basic/fops-sanity.c +@@ -17,15 +17,16 @@ + + /* Filesystem basic sanity check, tests all (almost) fops. */ + +-#include ++#include ++#include + #include +-#include +-#include ++#include ++#include + #include ++#include ++#include + #include +-#include +-#include +-#include ++#include + + #ifndef linux + #include +@@ -34,904 +35,880 @@ + #endif + + /* for fd based fops after unlink */ +-int +-fd_based_fops_1(char *filename); ++int fd_based_fops_1(char *filename); + /* for fd based fops before unlink */ +-int +-fd_based_fops_2(char *filename); ++int fd_based_fops_2(char *filename); + /* fops based on fd after dup */ +-int +-dup_fd_based_fops(char *filename); ++int dup_fd_based_fops(char *filename); + /* for fops based on path */ +-int +-path_based_fops(char *filename); ++int path_based_fops(char *filename); + /* for fops which operate on directory */ +-int +-dir_based_fops(char *filename); ++int dir_based_fops(char *filename); + /* for fops which operate in link files (symlinks) */ +-int +-link_based_fops(char *filename); ++int link_based_fops(char *filename); + /* to test open syscall with open modes available. */ +-int +-test_open_modes(char *filename); ++int test_open_modes(char *filename); + /* generic function which does open write and read. */ +-int +-generic_open_read_write(char *filename, int flag, mode_t mode); ++int generic_open_read_write(char *filename, int flag, mode_t mode); + + #define OPEN_MODE 0666 + +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int result = 0; +- char filename[255] = { +- 0, +- }; +- +- if (argc > 1) +- strcpy(filename, argv[1]); +- else +- strcpy(filename, "temp-xattr-test-file"); +- +- ret = fd_based_fops_1(strcat(filename, "_1")); +- if (ret < 0) { +- fprintf(stderr, "fd based file operation 1 failed\n"); +- result |= ret; +- } else { +- fprintf(stdout, "fd based file operation 1 passed\n"); +- } +- +- ret = fd_based_fops_2(strcat(filename, "_2")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "fd based file operation 2 failed\n"); +- } else { +- fprintf(stdout, "fd based file operation 2 passed\n"); +- } +- +- ret = dup_fd_based_fops(strcat(filename, "_3")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "dup fd based file operation failed\n"); +- } else { +- fprintf(stdout, "dup fd based file operation passed\n"); +- } +- +- ret = path_based_fops(strcat(filename, "_4")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "path based file operation failed\n"); +- } else { +- fprintf(stdout, "path based file operation passed\n"); +- } +- +- ret = dir_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "directory based file operation failed\n"); +- } else { +- fprintf(stdout, "directory based file operation passed\n"); +- } +- +- ret = link_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "link based file operation failed\n"); +- } else { +- fprintf(stdout, "link based file operation passed\n"); +- } +- +- ret = test_open_modes(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "testing modes of `open' call failed\n"); +- } else { +- fprintf(stdout, "testing modes of `open' call passed\n"); +- } +- return result; ++int main(int argc, char *argv[]) { ++ int ret = -1; ++ int result = 0; ++ char filename[255] = { ++ 0, ++ }; ++ ++ if (argc > 1) ++ strcpy(filename, argv[1]); ++ else ++ strcpy(filename, "temp-xattr-test-file"); ++ ++ ret = fd_based_fops_1(strcat(filename, "_1")); ++ if (ret < 0) { ++ fprintf(stderr, "fd based file operation 1 failed\n"); ++ result |= ret; ++ } else { ++ fprintf(stdout, "fd based file operation 1 passed\n"); ++ } ++ ++ ret = fd_based_fops_2(strcat(filename, "_2")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "fd based file operation 2 failed\n"); ++ } else { ++ fprintf(stdout, "fd based file operation 2 passed\n"); ++ } ++ ++ ret = dup_fd_based_fops(strcat(filename, "_3")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "dup fd based file operation failed\n"); ++ } else { ++ fprintf(stdout, "dup fd based file operation passed\n"); ++ } ++ ++ ret = path_based_fops(strcat(filename, "_4")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "path based file operation failed\n"); ++ } else { ++ fprintf(stdout, "path based file operation passed\n"); ++ } ++ ++ ret = dir_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "directory based file operation failed\n"); ++ } else { ++ fprintf(stdout, "directory based file operation passed\n"); ++ } ++ ++ ret = link_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "link based file operation failed\n"); ++ } else { ++ fprintf(stdout, "link based file operation passed\n"); ++ } ++ ++ ret = test_open_modes(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "testing modes of `open' call failed\n"); ++ } else { ++ fprintf(stdout, "testing modes of `open' call passed\n"); ++ } ++ return result; + } + + /* Execute all possible fops on a fd which is unlinked */ +-int +-fd_based_fops_1(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- /* +- * These metadata operations fail at the moment because kernel doesn't +- * pass the client fd in the operation. +- * The following bug tracks this change. +- * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 +- * ret = fchmod (fd, 0640); +- * if (ret < 0) { +- * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fchown (fd, 10001, 10001); +- * if (ret < 0) { +- * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); +- * if (ret < 0) { +- * fprintf (stderr, "fsetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = flistxattr (fd, NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "flistxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "fgetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fremovexattr (fd, "trusted.xattr-test"); +- * if (ret < 0) { +- * fprintf (stderr, "fremovexattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- */ +- +- if (fd) +- close(fd); +- return result; ++int fd_based_fops_1(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ /* ++ * These metadata operations fail at the moment because kernel doesn't ++ * pass the client fd in the operation. ++ * The following bug tracks this change. ++ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 ++ * ret = fchmod (fd, 0640); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fchown (fd, 10001, 10001); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); ++ * if (ret < 0) { ++ * fprintf (stderr, "fsetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = flistxattr (fd, NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "flistxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "fgetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fremovexattr (fd, "trusted.xattr-test"); ++ * if (ret < 0) { ++ * fprintf (stderr, "fremovexattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ */ ++ ++ if (fd) ++ close(fd); ++ return result; + } + +-int +-fd_based_fops_2(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my second string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret < 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(fd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(fd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(fd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(fd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(filename); ++int fd_based_fops_2(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my second string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret < 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(fd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(fd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(fd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(fd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(filename); + +- return result; ++ return result; + } + +-int +-path_based_fops(char *filename) +-{ +- int ret = -1; +- int fd = 0; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char newfilename[255] = { +- 0, +- }; +- char *hardlink = "linkfile-hard.txt"; +- char *symlnk = "linkfile-soft.txt"; +- char buf[1024] = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- ret = truncate(filename, 0); +- if (ret < 0) { +- fprintf(stderr, "truncate failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(filename, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(filename, 0640); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(filename, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(filename, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(filename, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(filename, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = access(filename, R_OK | W_OK); +- if (ret < 0) { +- fprintf(stderr, "access failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = link(filename, hardlink); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(hardlink); +- +- ret = symlink(filename, symlnk); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = readlink(symlnk, buf, sizeof(buf)); +- if (ret < 0) { +- fprintf(stderr, "readlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(symlnk); +- +- /* Create a character special file */ +- ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); +- if (ret < 0) { +- fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("cspecial"); +- +- ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); +- if (ret < 0) { +- fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("bspecial"); ++int path_based_fops(char *filename) { ++ int ret = -1; ++ int fd = 0; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char newfilename[255] = { ++ 0, ++ }; ++ char *hardlink = "linkfile-hard.txt"; ++ char *symlnk = "linkfile-soft.txt"; ++ char buf[1024] = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = truncate(filename, 0); ++ if (ret < 0) { ++ fprintf(stderr, "truncate failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(filename, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(filename, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(filename, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(filename, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(filename, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(filename, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = access(filename, R_OK | W_OK); ++ if (ret < 0) { ++ fprintf(stderr, "access failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = link(filename, hardlink); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(hardlink); ++ ++ ret = symlink(filename, symlnk); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = readlink(symlnk, buf, sizeof(buf)); ++ if (ret < 0) { ++ fprintf(stderr, "readlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(symlnk); ++ ++ /* Create a character special file */ ++ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); ++ if (ret < 0) { ++ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("cspecial"); ++ ++ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); ++ if (ret < 0) { ++ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("bspecial"); + + #ifdef linux +- ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); ++ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); + #else +- ret = mkfifo("fifo", 0); ++ ret = mkfifo("fifo", 0); + #endif +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("fifo"); ++ if (ret < 0) { ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("fifo"); + + #ifdef linux +- ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); +- if (ret < 0) { +- fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } ++ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); ++ if (ret < 0) { ++ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } + #else +- { +- int s; +- const char *pathname = "sock"; +- struct sockaddr_un addr; +- +- s = socket(PF_LOCAL, SOCK_STREAM, 0); +- memset(&addr, 0, sizeof(addr)); +- strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); +- ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- close(s); +- } +-#endif +- unlink("sock"); ++ { ++ int s; ++ const char *pathname = "sock"; ++ struct sockaddr_un addr; + +- strcpy(newfilename, filename); +- strcat(newfilename, "_new"); +- ret = rename(filename, newfilename); ++ s = socket(PF_LOCAL, SOCK_STREAM, 0); ++ memset(&addr, 0, sizeof(addr)); ++ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); ++ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); + if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(newfilename); +- +- if (fd) +- close(fd); +- +- unlink(filename); +- return result; +-} +- +-int +-dup_fd_based_fops(char *filename) +-{ +- int fd = 0; +- int result = 0; +- int newfd = 0; +- int ret = -1; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- newfd = dup(fd); +- if (newfd < 0) { +- fprintf(stderr, "dup failed: %s\n", strerror(errno)); +- result |= ret; ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; + } +- ++ close(s); ++ } ++#endif ++ unlink("sock"); ++ ++ strcpy(newfilename, filename); ++ strcat(newfilename, "_new"); ++ ret = rename(filename, newfilename); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(newfilename); ++ ++ if (fd) + close(fd); + +- strcpy(wstr, "This is my string\n"); +- ret = write(newfd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(newfd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(newfd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(newfd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(newfd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(newfd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(newfd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(newfd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(newfd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (newfd) +- close(newfd); +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- return result; ++ unlink(filename); ++ return result; + } + +-int +-dir_based_fops(char *dirname) +-{ +- int ret = -1; +- int result = 0; +- DIR *dp = NULL; +- char buff[255] = { +- 0, +- }; +- struct dirent *dbuff = { +- 0, +- }; +- struct stat stbuff = { +- 0, +- }; +- char newdname[255] = { +- 0, +- }; +- char *cwd = NULL; +- +- ret = mkdir(dirname, 0755); +- if (ret < 0) { +- fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); +- return ret; +- } +- +- dp = opendir(dirname); +- if (dp == NULL) { +- fprintf(stderr, "opendir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- dbuff = readdir(dp); +- if (NULL == dbuff) { +- fprintf(stderr, "readdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = closedir(dp); +- if (ret < 0) { +- fprintf(stderr, "closedir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(dirname, &stbuff); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(dirname, 0744); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(dirname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(dirname, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(dirname, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "/../"); +- ret = chdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "chdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- cwd = getcwd(buff, 255); +- if (NULL == cwd) { +- fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "new"); +- ret = rename(dirname, newdname); +- if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = rmdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- rmdir(dirname); +- return result; ++int dup_fd_based_fops(char *filename) { ++ int fd = 0; ++ int result = 0; ++ int newfd = 0; ++ int ret = -1; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ newfd = dup(fd); ++ if (newfd < 0) { ++ fprintf(stderr, "dup failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ close(fd); ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(newfd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(newfd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(newfd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(newfd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(newfd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(newfd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(newfd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(newfd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(newfd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (newfd) ++ close(newfd); ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ return result; + } + +-int +-link_based_fops(char *filename) +-{ +- int ret = -1; +- int result = 0; +- int fd = 0; +- char newname[255] = { +- 0, +- }; +- char linkname[255] = { +- 0, +- }; +- struct stat lstbuf = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fd = 0; +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- strcpy(newname, filename); +- strcat(newname, "_hlink"); +- ret = link(filename, newname); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(linkname, filename); +- strcat(linkname, "_slink"); +- ret = symlink(newname, linkname); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lstat(linkname, &lstbuf); +- if (ret < 0) { +- fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lchown(linkname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "lchown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = llistxattr(linkname, NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lremovexattr(linkname, "trusted.lxattr-test"); +- if (ret < 0) { +- fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(linkname); +- unlink(newname); +- return result; ++int dir_based_fops(char *dirname) { ++ int ret = -1; ++ int result = 0; ++ DIR *dp = NULL; ++ char buff[255] = { ++ 0, ++ }; ++ struct dirent *dbuff = { ++ 0, ++ }; ++ struct stat stbuff = { ++ 0, ++ }; ++ char newdname[255] = { ++ 0, ++ }; ++ char *cwd = NULL; ++ ++ ret = mkdir(dirname, 0755); ++ if (ret < 0) { ++ fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ dp = opendir(dirname); ++ if (dp == NULL) { ++ fprintf(stderr, "opendir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ dbuff = readdir(dp); ++ if (NULL == dbuff) { ++ fprintf(stderr, "readdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = closedir(dp); ++ if (ret < 0) { ++ fprintf(stderr, "closedir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(dirname, &stbuff); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(dirname, 0744); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(dirname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(dirname, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(dirname, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "/../"); ++ ret = chdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "chdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ cwd = getcwd(buff, 255); ++ if (NULL == cwd) { ++ fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "new"); ++ ret = rename(dirname, newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = rmdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ rmdir(dirname); ++ return result; + } + +-int +-test_open_modes(char *filename) +-{ +- int ret = -1; +- int result = 0; +- +- ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_WRONLY, 0); +- if (ret != 0) { +- fprintf(stderr, "flag O_WRONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDWR, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDONLY failed\n"); +- result |= ret; +- } ++int link_based_fops(char *filename) { ++ int ret = -1; ++ int result = 0; ++ int fd = 0; ++ char newname[255] = { ++ 0, ++ }; ++ char linkname[255] = { ++ 0, ++ }; ++ struct stat lstbuf = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fd = 0; ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ strcpy(newname, filename); ++ strcat(newname, "_hlink"); ++ ret = link(filename, newname); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(linkname, filename); ++ strcat(linkname, "_slink"); ++ ret = symlink(newname, linkname); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lstat(linkname, &lstbuf); ++ if (ret < 0) { ++ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lchown(linkname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "lchown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = llistxattr(linkname, NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lremovexattr(linkname, "trusted.lxattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(linkname); ++ unlink(newname); ++ return result; ++} + +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); +- result |= ret; +- } ++int test_open_modes(char *filename) { ++ int ret = -1; ++ int result = 0; ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_WRONLY, 0); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_WRONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDWR, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); ++ result |= ret; ++ } + + #if 0 /* undefined behaviour, unable to reliably test */ + ret = creat (filename, 0644); +@@ -943,90 +920,87 @@ test_open_modes(char *filename) + } + #endif + +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, +- OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); +- result |= ret; +- } +- +- return result; ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); ++ result |= ret; ++ } ++ ++ return result; + } + +-int +-generic_open_read_write(char *filename, int flag, mode_t mode) +-{ +- int fd = 0; +- int ret = -1; +- char wstring[50] = { +- 0, +- }; +- char rstring[50] = { +- 0, +- }; +- +- fd = open(filename, flag, mode); +- if (fd < 0) { +- if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { +- unlink(filename); +- return 0; +- } else { +- fprintf(stderr, "open failed: %s\n", strerror(errno)); +- return -1; +- } +- } +- +- strcpy(wstring, "My string to write\n"); +- ret = write(fd, wstring, strlen(wstring)); +- if (ret <= 0) { +- if (errno != EBADF) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- close(fd); +- unlink(filename); +- return ret; +- } +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- close(fd); +- unlink(filename); +- return ret; ++int generic_open_read_write(char *filename, int flag, mode_t mode) { ++ int fd = 0; ++ int ret = -1; ++ char wstring[50] = { ++ 0, ++ }; ++ char rstring[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, flag, mode); ++ if (fd < 0) { ++ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { ++ unlink(filename); ++ return 0; ++ } else { ++ fprintf(stderr, "open failed: %s\n", strerror(errno)); ++ return -1; + } ++ } + +- ret = read(fd, rstring, strlen(wstring)); +- if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && +- flag != (O_TRUNC | O_WRONLY)) { +- close(fd); +- unlink(filename); +- return ret; ++ strcpy(wstring, "My string to write\n"); ++ ret = write(fd, wstring, strlen(wstring)); ++ if (ret <= 0) { ++ if (errno != EBADF) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ close(fd); ++ unlink(filename); ++ return ret; + } ++ } + +- /* Compare the rstring with wstring. But we do not want to return +- * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or +- * O_TRUNC|O_RDONLY. Because in that case we are not writing +- * anything to the file.*/ +- +- ret = memcmp(wstring, rstring, strlen(wstring)); +- if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && +- flag != (O_CREAT | O_WRONLY) && +- !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || +- flag == (O_TRUNC | O_RDONLY))) { +- fprintf(stderr, "read is returning junk\n"); +- close(fd); +- unlink(filename); +- return ret; +- } ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } + ++ ret = read(fd, rstring, strlen(wstring)); ++ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_TRUNC | O_WRONLY)) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } ++ ++ /* Compare the rstring with wstring. But we do not want to return ++ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or ++ * O_TRUNC|O_RDONLY. Because in that case we are not writing ++ * anything to the file.*/ ++ ++ ret = memcmp(wstring, rstring, strlen(wstring)); ++ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_CREAT | O_WRONLY) && ++ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || ++ flag == (O_TRUNC | O_RDONLY))) { ++ fprintf(stderr, "read is returning junk\n"); + close(fd); + unlink(filename); +- return 0; ++ return ret; ++ } ++ ++ close(fd); ++ unlink(filename); ++ return 0; + } +diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t +index 3343956..0e4a1bb 100644 +--- a/tests/basic/posix/shared-statfs.t ++++ b/tests/basic/posix/shared-statfs.t +@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2 + TEST MOUNT_LOOP $LO1 $B0/${V0}1 + TEST MOUNT_LOOP $LO2 $B0/${V0}2 + ++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}') ++#Account for rounding error ++brick_blocks_two_percent_less=$((total_brick_blocks*98/100)) + # Create a subdir in mountpoint and use that for volume. + TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') + # Keeping the size less than 200M mainly because XFS will use + # some storage in brick to keep its own metadata. +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + + TEST force_umount $M0 +@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/ + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + TEST force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index 8e5d77b..e719fc5 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 3072 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t +index d10fd9f..26d196e 100644 +--- a/tests/bugs/fuse/bug-985074.t ++++ b/tests/bugs/fuse/bug-985074.t +@@ -30,7 +30,7 @@ TEST glusterd + + TEST $CLI volume create $V0 $H0:$B0/$V0 + TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 md-cache-timeout 3 ++TEST $CLI volume set $V0 performance.stat-prefetch off + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0 +@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link + TEST ls -ali $M0 $M1 + TEST rm -f $M1/file.link + TEST ls -ali $M0 $M1 +-# expire the md-cache timeout +-sleep 3 + TEST mv $M0/file $M0/file.link + TEST stat $M0/file.link + TEST ! stat $M0/file +diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t +deleted file mode 100755 +index aaf6362..0000000 +--- a/tests/bugs/glusterd/quorum-value-check.t ++++ /dev/null +@@ -1,35 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +- +-function check_quorum_nfs() { +- local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')" +- local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')" +- +- if [ $qnfs = $qinfo ]; then +- echo "Y" +- else +- echo "N" +- fi +-} +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume set $V0 nfs.disable off +-TEST $CLI volume set $V0 performance.write-behind off +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.quorum-type fixed +-TEST $CLI volume start $V0 +- +-TEST $CLI volume set $V0 cluster.quorum-count 1 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 2 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 3 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +- +-cleanup; +diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t +index 82f7cca..f65b1bd 100755 +--- a/tests/bugs/glusterfs-server/bug-887145.t ++++ b/tests/bugs/glusterfs-server/bug-887145.t +@@ -29,7 +29,15 @@ chmod 600 $M0/file; + + TEST mount_nfs $H0:/$V0 $N0 nolock; + +-chown -R nfsnobody:nfsnobody $M0/dir; ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++chown -R $usr:$grp $M0/dir; + chown -R tmp_user:tmp_user $M0/other; + + TEST $CLI volume set $V0 server.root-squash on; +@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; + + # create files and directories in the root of the glusterfs and nfs mount + # which is owned by root and hence the right behavior is getting EACCESS +-# as the fops are executed as nfsnobody. ++# as the fops are executed as nfsnobody/nobody. + touch $M0/foo 2>/dev/null; + TEST [ $? -ne 0 ] + touch $N0/foo 2>/dev/null; +@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null; + TEST [ $? -eq 0 ] + + # create files and directories should succeed as the fops are being executed +-# inside the directory owned by nfsnobody ++# inside the directory owned by nfsnobody/nobody + TEST touch $M0/dir/file; + TEST touch $N0/dir/foo; + TEST mkdir $M0/dir/new; +diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t +deleted file mode 100755 +index 2f53172..0000000 +--- a/tests/bugs/nfs/bug-1053579.t ++++ /dev/null +@@ -1,114 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-# prepare the users and groups +-NEW_USER=bug1053579 +-NEW_UID=1053579 +-NEW_GID=1053579 +-LAST_GID=1053779 +-NEW_GIDS=${NEW_GID} +- +-# OS-specific overrides +-case $OSTYPE in +-NetBSD|Darwin) +- # only NGROUPS_MAX=16 secondary groups are supported +- LAST_GID=1053593 +- ;; +-FreeBSD) +- # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups +- ;; +-Linux) +- # NGROUPS_MAX=65536, we can afford 200 groups +- ;; +-*) +- ;; +-esac +- +-# create a user that belongs to many groups +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupadd -o -g ${GID} ${NEW_USER}-${GID} +- NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}" +-done +-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} +- +-# preparation done, start the tests +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/${V0}1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.server-aux-gids on +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-# mount the volume +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-# the actual test, this used to crash +-su -m ${NEW_USER} -c "stat $N0/. > /dev/null" +-TEST [ $? -eq 0 ] +- +-# create a file that only a user in a high-group can access +-echo 'Hello World!' > $N0/README +-chgrp ${LAST_GID} $N0/README +-chmod 0640 $N0/README +- +-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-su -m ${NEW_USER} -c "cat $N0/README" +-ret=$? +- +-case $OSTYPE in +-Linux) # Linux NFS fails with big GID +- if [ $ret -ne 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-*) # Other systems should cope better +- if [ $ret -eq 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-esac +-TEST [ "x$res" = "xY" ] +- +-# This passes only on build.gluster.org, not reproducible on other machines?! +-#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-#TEST [ $? -ne 0 ] +- +-# enable server.manage-gids and things should work +-TEST $CLI volume set $V0 server.manage-gids on +- +-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +- +-# cleanup +-userdel --force ${NEW_USER} +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupdel ${NEW_USER}-${GID} +-done +- +-rm -f $N0/README +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +- +-TEST $CLI volume stop $V0 +-TEST $CLI volume delete $V0 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t +deleted file mode 100644 +index dd3998d..0000000 +--- a/tests/bugs/nfs/bug-1116503.t ++++ /dev/null +@@ -1,47 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting NFS over UDP (MOUNT service only) works. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.mount-udp on +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp; +-TEST mkdir -p $N0/foo/bar +-TEST ls $N0/foo +-TEST ls $N0/foo/bar +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.addr-namelookup on +-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t +deleted file mode 100644 +index c360db4..0000000 +--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t ++++ /dev/null +@@ -1,24 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 performance.open-behind off +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/foo +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-TEST mount_nfs $H0:/$V0/foo $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-cleanup +diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t +deleted file mode 100644 +index dea609e..0000000 +--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t ++++ /dev/null +@@ -1,126 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-## Start and create a volume +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume info; +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0; +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount NFS +-TEST mount_nfs $H0:/$V0 $N0 nolock; +- +-mkdir $N0/dir1; +-mkdir $N0/dir2; +-pushd $N0/ ; +- +-##link created using relative path +-ln -s dir1 symlink1; +- +-##relative path contains ".." +-ln -s ../dir1 dir2/symlink2; +- +-##link created using absolute path +-ln -s $N0/dir1 symlink3; +- +-##link pointing to another symlinks +-ln -s symlink1 symlink4 +-ln -s symlink3 symlink5 +- +-##dead links +-ln -s does/not/exist symlink6 +- +-##link which contains ".." points out of glusterfs +-ln -s ../../ symlink7 +- +-##links pointing to unauthorized area +-ln -s .glusterfs symlink8 +- +-popd ; +- +-##Umount the volume +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock; +- +-## Mount NFS via symlink7 should fail +-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock; +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock; +- +-##Similar check for udp mount +-$CLI volume stop $V0 +-TEST $CLI volume set $V0 nfs.mount-udp on +-$CLI volume start $V0 +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp; +- +-##symlink7 is not check here, because in udp mount ../../ resolves into root '/' +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp; +- +-rm -rf $H0:$B0/ +-cleanup; +diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t +deleted file mode 100644 +index 45a22e7..0000000 +--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info +- +-TEST $CLI volume create $V0 $H0:$B0/brick1; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +-TEST mount_nfs $H0:/$V0 $N0 +- +-TEST touch $N0/file1 +-TEST chmod 700 $N0/file1 +-TEST getfacl $N0/file1 +- +-TEST $CLI volume set $V0 root-squash on +-TEST getfacl $N0/file1 +- +-TEST umount_nfs $H0:/$V0 $N0 +-TEST mount_nfs $H0:/$V0 $N0 +-TEST getfacl $N0/file1 +- +-## Before killing daemon to avoid deadlocks +-umount_nfs $N0 +- +-cleanup; +- +diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t +deleted file mode 100755 +index c4f51a2..0000000 +--- a/tests/bugs/nfs/bug-1166862.t ++++ /dev/null +@@ -1,69 +0,0 @@ +-#!/bin/bash +-# +-# When nfs.mount-rmtab is disabled, it should not get updated. +-# +-# Based on: bug-904065.t +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -n "$1" ] +- then +- $@ 2>/dev/null | wc -l +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# disable the rmtab by settting it to the magic "/-" value +-TEST $CLI volume set $V0 nfs.mount-rmtab /- +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# showmount should list one client +-EXPECT '1' count_lines showmount --no-headers $H0 +- +-# unmount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-# after resetting the option, the rmtab should get updated again +-TEST $CLI volume reset $V0 nfs.mount-rmtab +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c +deleted file mode 100644 +index d409924..0000000 +--- a/tests/bugs/nfs/bug-1210338.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int fd = -1; +- +- fd = open(argv[1], O_CREAT | O_EXCL, 0644); +- +- if (fd == -1) { +- fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1], +- strerror(errno)); +- goto out; +- } +- +- ret = 0; +- +-out: +- if (fd > 0) +- close(fd); +- +- return ret; +-} +diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t +deleted file mode 100644 +index b5c9245..0000000 +--- a/tests/bugs/nfs/bug-1210338.t ++++ /dev/null +@@ -1,30 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-NFS_SOURCE=$(dirname $0)/bug-1210338.c +-NFS_EXEC=$(dirname $0)/excl_create +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-build_tester $NFS_SOURCE -o $NFS_EXEC +-TEST [ -e $NFS_EXEC ] +- +-TEST $NFS_EXEC $N0/my_file +- +-rm -f $NFS_EXEC; +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t +deleted file mode 100755 +index a2fb0e6..0000000 +--- a/tests/bugs/nfs/bug-1302948.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.rdirplus option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.rdirplus off +-TEST $CLI volume set $V0 nfs.rdirplus on +-cleanup +diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t +deleted file mode 100755 +index 5ccee72..0000000 +--- a/tests/bugs/nfs/bug-847622.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on ACL which are not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/brick0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-# simple getfacl setfacl commands +-TEST touch testfile +-TEST setfacl -m u:14:r testfile +-TEST getfacl testfile +- +-cd +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +- +diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t +deleted file mode 100755 +index dca315a..0000000 +--- a/tests/bugs/nfs/bug-877885.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-## Mount FUSE with caching disabled +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \ +-$M0; +- +-TEST touch $M0/file +-TEST mkdir $M0/dir +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-rm -rf * & +- +-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock; +- +-cd; +- +-kill %1; +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t +deleted file mode 100755 +index 0eba86e..0000000 +--- a/tests/bugs/nfs/bug-904065.t ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# This test does not use 'showmount' from the nfs-utils package, it would +-# require setting up a portmapper (either rpcbind or portmap, depending on the +-# Linux distribution used for testing). The persistancy of the rmtab should not +-# affect the current showmount outputs, so existing regression tests should be +-# sufficient. +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -e "$1" ] +- then +- wc -l < $1 +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-# the output would looks similar to: +-# +-# hostname-0=172.31.122.104 +-# mountpoint-0=/ufo +-# +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# duplicate mounts should not be recorded (client could have crashed) +-TEST mount_nfs $H0:/$V0 $N1 nolock +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount should (even if there are two) should remove the entry +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# unmounting the other mount should work flawlessly +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0 +- +-# we'll create a fake rmtab here, similar to how an other storage server would do +-# using an invalid IP address to prevent (unlikely) collisions on the test-machine +-cat << EOF > $M0/rmtab +-hostname-0=127.0.0.256 +-mountpoint-0=/ufo +-EOF +-EXPECT '2' count_lines $M0/rmtab +- +-# reconfigure merges the rmtab with the one on the volume +-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab +- +-# glusterfs/nfs needs some time to restart +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# Apparently "is_nfs_export_available" might return even if the export is +-# not, in fact, available. (eyeroll) Give it a bit of extra time. +-# +-# TBD: fix the broken shell function instead of working around it here +-sleep 5 +- +-# a new mount should be added to the rmtab, not overwrite exiting ones +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '2' count_lines $M0/rmtab +- +-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the +-# NFS-server supports reloading and does not get restarted anymore, we should +-# add a test that includes the merging of entries in the old rmtab with the new +-# rmtab. +- +-cleanup +diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t +deleted file mode 100755 +index bd27915..0000000 +--- a/tests/bugs/nfs/bug-915280.t ++++ /dev/null +@@ -1,54 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-MOUNTDIR=$N0; +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1 +-TEST touch $N0/testfile +- +-TEST $CLI volume set $V0 debug.error-gen client +-TEST $CLI volume set $V0 debug.error-fops stat +-TEST $CLI volume set $V0 debug.error-failure 100 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-pid_file=$(read_nfs_pidfile); +- +-getfacl $N0/testfile 2>/dev/null +- +-nfs_pid=$(get_nfs_pid); +-if [ ! $nfs_pid ] +-then +- nfs_pid=0; +-fi +- +-TEST [ $nfs_pid -eq $pid_file ] +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t +deleted file mode 100755 +index 61be484..0000000 +--- a/tests/bugs/nfs/bug-970070.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.acl option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.acl off +-TEST $CLI volume set $V0 nfs.acl on +-cleanup +diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t +deleted file mode 100755 +index 975c46f..0000000 +--- a/tests/bugs/nfs/bug-974972.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-#This script checks that nfs mount does not fail lookup on files with split-brain +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-TEST $CLI volume set $V0 self-heal-daemon off +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 +-TEST touch $N0/1 +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 +-echo abc > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0 +-echo def > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-#Lookup should not fail +-TEST ls $N0/1 +-TEST ! cat $N0/1 +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t +deleted file mode 100644 +index f1b6859..0000000 +--- a/tests/bugs/nfs/showmount-many-clients.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +-# +-# The nfs.rpc-auth-allow volume option is used to generate the list of clients +-# that are displayed as able to mount the export. The "group" in the export +-# should be a list of all clients, identified by "name". In previous versions, +-# the "name" was the copied string from nfs.rpc-auth-allow. This is not +-# correct, as the volume option should be parsed and split into different +-# groups. +-# +-# When the single string is passed, this testcase fails when the +-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting +-# the groups into their own structures, this testcase passes. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,) +-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS} +-TEST $CLI volume set $V0 nfs.rpc-auth-reject all +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# showmount should not timeout (no reply is sent on error) +-TEST showmount -e $H0 +- +-cleanup +diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py +deleted file mode 100755 +index eb507e1..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.py ++++ /dev/null +@@ -1,33 +0,0 @@ +-# +-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe). +-# +-# Author: Niels de Vos +-# +- +-from __future__ import print_function +-import os +-import stat +-import sys +-import socket +- +-ret = 1 +- +-if len(sys.argv) != 2: +- print('Usage: %s ' % (sys.argv[0])) +- sys.exit(ret) +- +-path = sys.argv[1] +- +-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +-sock.bind(path) +- +-stbuf = os.stat(path) +-mode = stbuf.st_mode +- +-if stat.S_ISSOCK(mode): +- ret = 0 +- +-sock.close() +-os.unlink(path) +- +-sys.exit(ret) +diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t +deleted file mode 100644 +index d9b9e95..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.t ++++ /dev/null +@@ -1,25 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# this is the actual test +-TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket +- +-TEST umount_nfs $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t +deleted file mode 100644 +index 6a11487..0000000 +--- a/tests/bugs/nfs/subdir-trailing-slash.t ++++ /dev/null +@@ -1,32 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting a subdir over NFS works, even with a trailing / +-# +-# For example: +-# mount -t nfs server.example.com:/volume/subdir/ +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/subdir +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t +deleted file mode 100755 +index 2a94009..0000000 +--- a/tests/bugs/nfs/zero-atime.t ++++ /dev/null +@@ -1,33 +0,0 @@ +-#!/bin/bash +-# +-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If +-# not set, these values are 0 and cause a atime/mtime set to the Epoch. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# create a file for testing +-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +- +-# timezone in UTC results in atime=0 if not set correctly +-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat +-TEST [ "$(stat --format=%X $M0/small)" != "0" ] +- +-TEST rm $M0/small +- +-cleanup +diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t +index 65af274..9ad0ab2 100755 +--- a/tests/bugs/rpc/bug-954057.t ++++ b/tests/bugs/rpc/bug-954057.t +@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + + TEST mkdir $M0/dir + TEST mkdir $M0/nobody +-TEST chown nfsnobody:nfsnobody $M0/nobody ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++TEST chown $usr:$grp $M0/nobody + TEST `echo "file" >> $M0/file` + TEST cp $M0/file $M0/new + TEST chmod 700 $M0/new +diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t +index 7628870..66e896a 100644 +--- a/tests/bugs/shard/bug-1272986.t ++++ b/tests/bugs/shard/bug-1272986.t +@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 + + # Write some data into a file, such that its size crosses the shard block size. +-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc ++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct + + md5sum1_reader=$(md5sum $M0/file | awk '{print $1}') + + EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'` + + # Append some more data into the file. +-TEST `echo "abcdefg" >> $M1/file` ++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct + +-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}') ++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}') + + # Test to see if the reader refreshes its cache correctly as part of the reads + # triggered through md5sum. If it does, then the md5sum on the reader and writer +diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t +index d4c0702..8070bc1 100755 +--- a/tests/bugs/transport/bug-873367.t ++++ b/tests/bugs/transport/bug-873367.t +@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.* + mkdir -p $B0/1 + mkdir -p $M0 + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 3cb45b5..cae010c 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -41,7 +41,7 @@ function valid_ciphers { + -e '/:$/s///' + } + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 7e1e199..e4bcdf5 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -33,18 +33,26 @@ wait_mount() { + openssl_connect() { + ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA" + ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR" +- #echo openssl s_client $ssl_opt $@ > /dev/tty +- #read -p "Continue? " nothing +- CIPHER=`echo "" | +- openssl s_client $ssl_opt $@ 2>/dev/null | +- awk '/^ Cipher/{print $3}'` +- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then ++ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null" ++ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then + echo "N" + else + echo "Y" + fi + } + ++#Validate the cipher to pass EXPECT test case before call openssl_connect ++check_cipher() { ++ cmd="echo "" | openssl s_client $@ 2> /dev/null" ++ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then ++ echo "N" ++ else ++ echo "Y" ++ fi ++} ++ + cleanup; + mkdir -p $B0 + mkdir -p $M0 +@@ -65,7 +73,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + +-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null ++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \ + -subj /CN=CA -out $SSL_CA + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \ +@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT + EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT + + # Test a HIGH CBC cipher +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT + + # Test EECDH +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test MD5 fails +-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT + + # test RC4 fails +-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT + + # test eNULL fails +-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT + + # test SHA2 +-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT + + # test GCM +-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT + + # Test DH fails without DH params +-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT + + # Test DH with DH params + TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem +@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT + + # Test the ec-curve option + TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1 +@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve invalid + EXPECT invalid volume_option $V0 ssl.ec-curve +@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve secp521r1 + EXPECT secp521r1 volume_option $V0 ssl.ec-curve +diff --git a/tests/ssl.rc b/tests/ssl.rc +index 127f83f..b1ccc4c 100644 +--- a/tests/ssl.rc ++++ b/tests/ssl.rc +@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca + + # Create self-signed certificates + function create_self_signed_certs (){ +- openssl genrsa -out $SSL_KEY 1024 ++ openssl genrsa -out $SSL_KEY 2048 + openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + return $? +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b248767..b224abd 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -10,6883 +10,6417 @@ + + #include + +-#include "shard.h" + #include "shard-mem-types.h" ++#include "shard.h" + #include + #include + #include + +-static gf_boolean_t +-__is_shard_dir(uuid_t gfid) +-{ +- shard_priv_t *priv = THIS->private; ++static gf_boolean_t __is_shard_dir(uuid_t gfid) { ++ shard_priv_t *priv = THIS->private; + +- if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) +- return _gf_true; ++ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-static gf_boolean_t +-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) +-{ +- if (frame->root->pid == GF_CLIENT_PID_GSYNCD && +- (__is_shard_dir(loc->pargfid) || +- (loc->parent && __is_shard_dir(loc->parent->gfid)))) +- return _gf_true; ++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { ++ if (frame->root->pid == GF_CLIENT_PID_GSYNCD && ++ (__is_shard_dir(loc->pargfid) || ++ (loc->parent && __is_shard_dir(loc->parent->gfid)))) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-void +-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(buf, len, "%s.%d", gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(buf, len, "%s.%d", gfid_str, block_num); + } + +-void +-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, ++ size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + } + +-int +-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx_p = NULL; ++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx_p = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret == 0) { +- *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; +- return ret; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret == 0) { ++ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ return ret; ++ } + +- ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); +- if (!ctx_p) +- return ret; ++ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); ++ if (!ctx_p) ++ return ret; + +- INIT_LIST_HEAD(&ctx_p->ilist); +- INIT_LIST_HEAD(&ctx_p->to_fsync_list); ++ INIT_LIST_HEAD(&ctx_p->ilist); ++ INIT_LIST_HEAD(&ctx_p->to_fsync_list); + +- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); +- if (ret < 0) { +- GF_FREE(ctx_p); +- return ret; +- } ++ ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); ++ if (ret < 0) { ++ GF_FREE(ctx_p); ++ return ret; ++ } + +- *ctx = ctx_p; ++ *ctx = ctx_p; + +- return ret; ++ return ret; + } + +-int +-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = 0; ++int shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = 0; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get(inode, this, ctx); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get(inode, this, ctx); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if (valid & SHARD_MASK_BLOCK_SIZE) +- ctx->block_size = block_size; ++ if (valid & SHARD_MASK_BLOCK_SIZE) ++ ctx->block_size = block_size; + +- if (valid & SHARD_MASK_PROT) +- ctx->stat.ia_prot = stbuf->ia_prot; ++ if (valid & SHARD_MASK_PROT) ++ ctx->stat.ia_prot = stbuf->ia_prot; + +- if (valid & SHARD_MASK_NLINK) +- ctx->stat.ia_nlink = stbuf->ia_nlink; ++ if (valid & SHARD_MASK_NLINK) ++ ctx->stat.ia_nlink = stbuf->ia_nlink; + +- if (valid & SHARD_MASK_UID) +- ctx->stat.ia_uid = stbuf->ia_uid; ++ if (valid & SHARD_MASK_UID) ++ ctx->stat.ia_uid = stbuf->ia_uid; + +- if (valid & SHARD_MASK_GID) +- ctx->stat.ia_gid = stbuf->ia_gid; ++ if (valid & SHARD_MASK_GID) ++ ctx->stat.ia_gid = stbuf->ia_gid; + +- if (valid & SHARD_MASK_SIZE) +- ctx->stat.ia_size = stbuf->ia_size; ++ if (valid & SHARD_MASK_SIZE) ++ ctx->stat.ia_size = stbuf->ia_size; + +- if (valid & SHARD_MASK_BLOCKS) +- ctx->stat.ia_blocks = stbuf->ia_blocks; ++ if (valid & SHARD_MASK_BLOCKS) ++ ctx->stat.ia_blocks = stbuf->ia_blocks; + +- if (valid & SHARD_MASK_TIMES) { +- SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, +- stbuf->ia_mtime, stbuf->ia_mtime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, +- stbuf->ia_ctime, stbuf->ia_ctime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, +- stbuf->ia_atime, stbuf->ia_atime_nsec); +- } ++ if (valid & SHARD_MASK_TIMES) { ++ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, ++ stbuf->ia_mtime, stbuf->ia_mtime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, ++ stbuf->ia_atime, stbuf->ia_atime_nsec); ++ } + +- if (valid & SHARD_MASK_OTHERS) { +- ctx->stat.ia_ino = stbuf->ia_ino; +- gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); +- ctx->stat.ia_dev = stbuf->ia_dev; +- ctx->stat.ia_type = stbuf->ia_type; +- ctx->stat.ia_rdev = stbuf->ia_rdev; +- ctx->stat.ia_blksize = stbuf->ia_blksize; +- } ++ if (valid & SHARD_MASK_OTHERS) { ++ ctx->stat.ia_ino = stbuf->ia_ino; ++ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); ++ ctx->stat.ia_dev = stbuf->ia_dev; ++ ctx->stat.ia_type = stbuf->ia_type; ++ ctx->stat.ia_rdev = stbuf->ia_rdev; ++ ctx->stat.ia_blksize = stbuf->ia_blksize; ++ } + +- if (valid & SHARD_MASK_REFRESH_RESET) +- ctx->refresh = _gf_false; ++ if (valid & SHARD_MASK_REFRESH_RESET) ++ ctx->refresh = _gf_false; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; ++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refresh = _gf_true; ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } +-int +-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set_refresh_flag(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refreshed = _gf_true; +- return 0; ++ ctx->refreshed = _gf_true; ++ return 0; + } + +-int +-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; +- shard_inode_ctx_t *base_ictx = NULL; +- shard_inode_ctx_t *shard_ictx = NULL; +- +- ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (ret) +- return ret; ++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; ++ shard_inode_ctx_t *base_ictx = NULL; ++ shard_inode_ctx_t *shard_ictx = NULL; + +- ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (ret) ++ return ret; + +- if (shard_ictx->fsync_needed) { +- shard_ictx->fsync_needed++; +- return 1; +- } ++ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); ++ if (ret) ++ return ret; + +- list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); +- shard_ictx->inode = shard_inode; ++ if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; +- base_ictx->fsync_count++; +- shard_ictx->base_inode = base_inode; ++ return 1; ++ } + +- return 0; ++ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); ++ shard_ictx->inode = shard_inode; ++ shard_ictx->fsync_needed++; ++ base_ictx->fsync_count++; ++ shard_ictx->base_inode = base_inode; ++ ++ return 0; + } + +-int +-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; ++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; + +- /* This ref acts as a refkeepr on the base inode. We +- * need to keep this inode alive as it holds the head +- * of the to_fsync_list. +- */ +- inode_ref(base_inode); +- inode_ref(shard_inode); ++ /* This ref acts as a refkeepr on the base inode. We ++ * need to keep this inode alive as it holds the head ++ * of the to_fsync_list. ++ */ ++ inode_ref(base_inode); ++ inode_ref(shard_inode); + +- LOCK(&base_inode->lock); +- LOCK(&shard_inode->lock); +- { +- ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, +- shard_inode); +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&base_inode->lock); ++ LOCK(&base_inode->lock); ++ LOCK(&shard_inode->lock); ++ { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&base_inode->lock); + +- /* Unref the base inode corresponding to the ref above, if the shard is +- * found to be already part of the fsync list. +- */ +- if (ret != 0) { +- inode_unref(base_inode); +- inode_unref(shard_inode); +- } +- return ret; ++ /* Unref the base inode corresponding to the ref above, if the shard is ++ * found to be already part of the fsync list. ++ */ ++ if (ret != 0) { ++ inode_unref(base_inode); ++ inode_unref(shard_inode); ++ } ++ return ret; + } + +-gf_boolean_t +-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- /* If inode ctx get fails, better to err on the side of caution and +- * try again? Unless the failure is due to mem-allocation. +- */ +- if (ret) +- return _gf_true; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ /* If inode ctx get fails, better to err on the side of caution and ++ * try again? Unless the failure is due to mem-allocation. ++ */ ++ if (ret) ++ return _gf_true; + +- return !ctx->refreshed; ++ return !ctx->refreshed; + } + +-gf_boolean_t +-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- gf_boolean_t flag = _gf_false; ++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ gf_boolean_t flag = _gf_false; + +- LOCK(&inode->lock); +- { +- flag = __shard_inode_ctx_needs_lookup(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { flag = __shard_inode_ctx_needs_lookup(inode, this); } ++ UNLOCK(&inode->lock); + +- return flag; ++ return flag; + } +-int +-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if ((stbuf->ia_size != ctx->stat.ia_size) || +- (stbuf->ia_blocks != ctx->stat.ia_blocks)) +- ctx->refresh = _gf_true; ++ if ((stbuf->ia_size != ctx->stat.ia_size) || ++ (stbuf->ia_blocks != ctx->stat.ia_blocks)) ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; ++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_invalidate(inode, this, stbuf); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *block_size = ctx->block_size; ++ *block_size = ctx->block_size; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; ++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_block_size(inode, this, block_size); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *fsync_count = ctx->fsync_needed; ++ *fsync_count = ctx->fsync_needed; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; ++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } +-int +-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); +- return 0; ++ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); ++ return 0; + } + +-int +-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; ++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_all(inode, this, ctx_out); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (ctx->refresh == _gf_false) +- *buf = ctx->stat; +- else +- *need_refresh = _gf_true; ++ if (ctx->refresh == _gf_false) ++ *buf = ctx->stat; ++ else ++ *need_refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; ++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, +- need_refresh); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ++ ret = ++ __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); ++ } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-void +-shard_local_wipe(shard_local_t *local) +-{ +- int i = 0; +- int count = 0; +- +- count = local->num_blocks; +- +- syncbarrier_destroy(&local->barrier); +- loc_wipe(&local->loc); +- loc_wipe(&local->dot_shard_loc); +- loc_wipe(&local->dot_shard_rm_loc); +- loc_wipe(&local->loc2); +- loc_wipe(&local->tmp_loc); +- loc_wipe(&local->int_inodelk.loc); +- loc_wipe(&local->int_entrylk.loc); +- loc_wipe(&local->newloc); +- +- if (local->int_entrylk.basename) +- GF_FREE(local->int_entrylk.basename); +- if (local->fd) +- fd_unref(local->fd); +- +- if (local->xattr_req) +- dict_unref(local->xattr_req); +- if (local->xattr_rsp) +- dict_unref(local->xattr_rsp); +- +- for (i = 0; i < count; i++) { +- if (!local->inode_list) +- break; +- +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- +- GF_FREE(local->inode_list); +- +- GF_FREE(local->vector); +- if (local->iobref) +- iobref_unref(local->iobref); +- if (local->list_inited) +- gf_dirent_free(&local->entries_head); +- if (local->inodelk_frame) +- SHARD_STACK_DESTROY(local->inodelk_frame); +- if (local->entrylk_frame) +- SHARD_STACK_DESTROY(local->entrylk_frame); +-} +- +-int +-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +-{ +- int ret = -1; +- void *size_attr = NULL; +- uint64_t size_array[4]; +- +- ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, +- SHARD_MSG_INTERNAL_XATTR_MISSING, +- "Failed to " +- "get " GF_XATTR_SHARD_FILE_SIZE " for %s", +- uuid_utoa(stbuf->ia_gfid)); +- return ret; +- } ++void shard_local_wipe(shard_local_t *local) { ++ int i = 0; ++ int count = 0; + +- memcpy(size_array, size_attr, sizeof(size_array)); ++ count = local->num_blocks; + +- stbuf->ia_size = ntoh64(size_array[0]); +- stbuf->ia_blocks = ntoh64(size_array[2]); ++ syncbarrier_destroy(&local->barrier); ++ loc_wipe(&local->loc); ++ loc_wipe(&local->dot_shard_loc); ++ loc_wipe(&local->dot_shard_rm_loc); ++ loc_wipe(&local->loc2); ++ loc_wipe(&local->tmp_loc); ++ loc_wipe(&local->int_inodelk.loc); ++ loc_wipe(&local->int_entrylk.loc); ++ loc_wipe(&local->newloc); + +- return 0; +-} ++ if (local->int_entrylk.basename) ++ GF_FREE(local->int_entrylk.basename); ++ if (local->fd) ++ fd_unref(local->fd); + +-int +-shard_call_count_return(call_frame_t *frame) +-{ +- int call_count = 0; +- shard_local_t *local = NULL; ++ if (local->xattr_req) ++ dict_unref(local->xattr_req); ++ if (local->xattr_rsp) ++ dict_unref(local->xattr_rsp); + +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ if (!local->inode_list) ++ break; ++ ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ ++ GF_FREE(local->inode_list); ++ ++ GF_FREE(local->vector); ++ if (local->iobref) ++ iobref_unref(local->iobref); ++ if (local->list_inited) ++ gf_dirent_free(&local->entries_head); ++ if (local->inodelk_frame) ++ SHARD_STACK_DESTROY(local->inodelk_frame); ++ if (local->entrylk_frame) ++ SHARD_STACK_DESTROY(local->entrylk_frame); ++} ++ ++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { ++ int ret = -1; ++ void *size_attr = NULL; ++ uint64_t size_array[4]; ++ ++ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INTERNAL_XATTR_MISSING, ++ "Failed to " ++ "get " GF_XATTR_SHARD_FILE_SIZE " for %s", ++ uuid_utoa(stbuf->ia_gfid)); ++ return ret; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ ++ stbuf->ia_size = ntoh64(size_array[0]); ++ stbuf->ia_blocks = ntoh64(size_array[2]); ++ ++ return 0; ++} ++ ++int shard_call_count_return(call_frame_t *frame) { ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ LOCK(&frame->lock); ++ { call_count = --local->call_count; } ++ UNLOCK(&frame->lock); ++ ++ return call_count; ++} ++ ++static char *shard_internal_dir_string(shard_internal_dir_type_t type) { ++ char *str = NULL; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ str = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ str = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ return str; ++} ++ ++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ char *bname = NULL; ++ inode_t *parent = NULL; ++ loc_t *internal_dir_loc = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ if (!local) ++ return -1; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ internal_dir_loc = &local->dot_shard_loc; ++ bname = GF_SHARD_DIR; ++ parent = inode_ref(this->itable->root); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ internal_dir_loc = &local->dot_shard_rm_loc; ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ parent = inode_ref(priv->dot_shard_inode); ++ break; ++ default: ++ break; ++ } ++ ++ internal_dir_loc->inode = inode_new(this->itable); ++ internal_dir_loc->parent = parent; ++ ret = inode_path(internal_dir_loc->parent, bname, ++ (char **)&internal_dir_loc->path); ++ if (ret < 0 || !(internal_dir_loc->inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", bname); ++ goto out; ++ } ++ ++ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); ++ if (internal_dir_loc->name) ++ internal_dir_loc->name++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ++ inode_t *base_inode, int block_num, ++ uuid_t gfid) { ++ char block_bname[256] = { ++ 0, ++ }; ++ inode_t *lru_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *lru_inode_ctx = NULL; ++ shard_inode_ctx_t *lru_base_inode_ctx = NULL; ++ inode_t *fsync_inode = NULL; ++ inode_t *lru_base_inode = NULL; ++ gf_boolean_t do_fsync = _gf_false; ++ ++ priv = this->private; ++ ++ shard_inode_ctx_get(linked_inode, this, &ctx); ++ ++ if (list_empty(&ctx->ilist)) { ++ if (priv->inode_count + 1 <= priv->lru_limit) { ++ /* If this inode was linked here for the first time (indicated ++ * by empty list), and if there is still space in the priv list, ++ * add this ctx to the tail of the list. ++ */ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ priv->inode_count++; ++ ctx->base_inode = inode_ref(base_inode); ++ } else { ++ /*If on the other hand there is no available slot for this inode ++ * in the list, delete the lru inode from the head of the list, ++ * unlink it. And in its place add this new inode into the list. ++ */ ++ lru_inode_ctx = ++ list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); ++ GF_ASSERT(lru_inode_ctx->block_num > 0); ++ lru_base_inode = lru_inode_ctx->base_inode; ++ list_del_init(&lru_inode_ctx->ilist); ++ lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); ++ /* If the lru inode was part of the pending-fsync list, ++ * the base inode needs to be unref'd, the lru inode ++ * deleted from fsync list and fsync'd in a new frame, ++ * and then unlinked in memory and forgotten. ++ */ ++ if (!lru_base_inode) ++ goto after_fsync_check; ++ LOCK(&lru_base_inode->lock); ++ LOCK(&lru_inode->lock); ++ { ++ if (!list_empty(&lru_inode_ctx->to_fsync_list)) { ++ list_del_init(&lru_inode_ctx->to_fsync_list); ++ lru_inode_ctx->fsync_needed = 0; ++ do_fsync = _gf_true; ++ __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); ++ lru_base_inode_ctx->fsync_count--; ++ } ++ } ++ UNLOCK(&lru_inode->lock); ++ UNLOCK(&lru_base_inode->lock); ++ ++ after_fsync_check: ++ if (!do_fsync) { ++ shard_make_block_bname(lru_inode_ctx->block_num, ++ lru_inode_ctx->base_gfid, block_bname, ++ sizeof(block_bname)); ++ /* The following unref corresponds to the ref held at ++ * the time the shard was added to the lru list. ++ */ ++ inode_unref(lru_inode); ++ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); ++ inode_forget(lru_inode, 0); ++ } else { ++ /* The following unref corresponds to the ref ++ * held when the shard was added to fsync list. ++ */ ++ inode_unref(lru_inode); ++ fsync_inode = lru_inode; ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ } ++ /* The following unref corresponds to the ref ++ * held by inode_find() above. ++ */ ++ inode_unref(lru_inode); ++ ++ /* The following unref corresponds to the ref held on the base shard ++ * at the time of adding shard inode to lru list ++ */ ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ ctx->base_inode = inode_ref(base_inode); ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ } else { ++ /* If this is not the first time this inode is being operated on, move ++ * it to the most recently used end of the list. ++ */ ++ list_move_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ return fsync_inode; ++} ++ ++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, ++ int32_t op_ret, int32_t op_errno) { ++ switch (fop) { ++ case GF_FOP_LOOKUP: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_STAT: ++ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSTAT: ++ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_TRUNCATE: ++ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FTRUNCATE: ++ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_MKNOD: ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_LINK: ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_CREATE: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_UNLINK: ++ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_RENAME: ++ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_READ: ++ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_FSYNC: ++ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_REMOVEXATTR: ++ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FREMOVEXATTR: ++ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FGETXATTR: ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_GETXATTR: ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSETXATTR: ++ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETXATTR: ++ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETATTR: ++ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FSETATTR: ++ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_SEEK: ++ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, ++ call_frame_t *frame, ++ int32_t op_ret) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ char block_bname[256] = { ++ 0, ++ }; ++ fd_t *anon_fd = cookie; ++ inode_t *shard_inode = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (anon_fd == NULL || op_ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, ++ "fsync failed on shard"); ++ goto out; ++ } ++ shard_inode = anon_fd->inode; ++ ++ LOCK(&priv->lock); ++ LOCK(&shard_inode->lock); ++ { ++ __shard_inode_ctx_get(shard_inode, this, &ctx); ++ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { ++ shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, ++ sizeof(block_bname)); ++ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); ++ /* The following unref corresponds to the ref held by ++ * inode_link() at the time the shard was created or ++ * looked up ++ */ ++ inode_unref(shard_inode); ++ inode_forget(shard_inode, 0); ++ } ++ } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&priv->lock); + +- LOCK(&frame->lock); +- { +- call_count = --local->call_count; ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { ++ fd_t *anon_fd = NULL; ++ call_frame_t *fsync_frame = NULL; ++ ++ fsync_frame = create_frame(this, this->ctx->pool); ++ if (!fsync_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to fsync shard"); ++ return -1; ++ } ++ ++ anon_fd = fd_anonymous(inode); ++ if (!anon_fd) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create anon fd to" ++ " fsync shard"); ++ STACK_DESTROY(fsync_frame->root); ++ return -1; ++ } ++ ++ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, ++ 1, NULL); ++ return 0; ++} ++ ++int shard_common_resolve_shards( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler) { ++ int i = -1; ++ uint32_t shard_idx_iter = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *res_inode = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ local->call_count = 0; ++ shard_idx_iter = local->first_block; ++ res_inode = local->resolver_base_inode; ++ if (res_inode) ++ gf_uuid_copy(gfid, res_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if ((local->op_ret < 0) || (local->resolve_not)) ++ goto out; ++ ++ while (shard_idx_iter <= local->last_block) { ++ i++; ++ if (shard_idx_iter == 0) { ++ local->inode_list[i] = inode_ref(res_inode); ++ shard_idx_iter++; ++ continue; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ inode = NULL; ++ inode = inode_resolve(this->itable, path); ++ if (inode) { ++ gf_msg_debug(this->name, 0, "Shard %d already " ++ "present. gfid=%s. Saving inode for future.", ++ shard_idx_iter, uuid_utoa(inode->gfid)); ++ local->inode_list[i] = inode; ++ /* Let the ref on the inodes that are already present ++ * in inode table still be held so that they don't get ++ * forgotten by the time the fop reaches the actual ++ * write stage. ++ */ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, ++ shard_idx_iter, gfid); ++ } ++ UNLOCK(&priv->lock); ++ shard_idx_iter++; ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++ continue; ++ } else { ++ local->call_count++; ++ shard_idx_iter++; + } +- UNLOCK(&frame->lock); ++ } ++out: ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *dict, dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if ((local->fd) && (local->fd->inode)) ++ inode = local->fd->inode; ++ else if (local->loc.inode) ++ inode = local->loc.inode; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" ++ " xattr failed on %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- return call_count; ++ if (shard_modify_size_and_block_count(&local->postbuf, dict)) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++err: ++ local->post_update_size_handler(frame, this); ++ return 0; + } + +-static char * +-shard_internal_dir_string(shard_internal_dir_type_t type) +-{ +- char *str = NULL; ++int shard_set_size_attrs(int64_t size, int64_t block_count, ++ int64_t **size_attr_p) { ++ int ret = -1; ++ int64_t *size_attr = NULL; + +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- str = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- str = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- return str; +-} +- +-static int +-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- char *bname = NULL; +- inode_t *parent = NULL; +- loc_t *internal_dir_loc = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- if (!local) +- return -1; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- internal_dir_loc = &local->dot_shard_loc; +- bname = GF_SHARD_DIR; +- parent = inode_ref(this->itable->root); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- internal_dir_loc = &local->dot_shard_rm_loc; +- bname = GF_SHARD_REMOVE_ME_DIR; +- parent = inode_ref(priv->dot_shard_inode); +- break; +- default: +- break; +- } ++ if (!size_attr_p) ++ goto out; + +- internal_dir_loc->inode = inode_new(this->itable); +- internal_dir_loc->parent = parent; +- ret = inode_path(internal_dir_loc->parent, bname, +- (char **)&internal_dir_loc->path); +- if (ret < 0 || !(internal_dir_loc->inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", bname); +- goto out; +- } ++ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); ++ if (!size_attr) ++ goto out; + +- internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); +- if (internal_dir_loc->name) +- internal_dir_loc->name++; ++ size_attr[0] = hton64(size); ++ /* As sharding evolves, it _may_ be necessary to embed more pieces of ++ * information within the same xattr. So allocating slots for them in ++ * advance. For now, only bytes 0-63 and 128-191 which would make up the ++ * current size and block count respectively of the file are valid. ++ */ ++ size_attr[2] = hton64(block_count); + +- ret = 0; ++ *size_attr_p = size_attr; ++ ++ ret = 0; + out: +- return ret; ++ return ret; + } + +-inode_t * +-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, +- inode_t *base_inode, int block_num, +- uuid_t gfid) +-{ +- char block_bname[256] = { +- 0, +- }; +- inode_t *lru_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *lru_inode_ctx = NULL; +- shard_inode_ctx_t *lru_base_inode_ctx = NULL; +- inode_t *fsync_inode = NULL; +- inode_t *lru_base_inode = NULL; +- gf_boolean_t do_fsync = _gf_false; +- +- priv = this->private; +- +- shard_inode_ctx_get(linked_inode, this, &ctx); +- +- if (list_empty(&ctx->ilist)) { +- if (priv->inode_count + 1 <= priv->lru_limit) { +- /* If this inode was linked here for the first time (indicated +- * by empty list), and if there is still space in the priv list, +- * add this ctx to the tail of the list. +- */ +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- priv->inode_count++; +- ctx->base_inode = inode_ref(base_inode); +- } else { +- /*If on the other hand there is no available slot for this inode +- * in the list, delete the lru inode from the head of the list, +- * unlink it. And in its place add this new inode into the list. +- */ +- lru_inode_ctx = list_first_entry(&priv->ilist_head, +- shard_inode_ctx_t, ilist); +- GF_ASSERT(lru_inode_ctx->block_num > 0); +- lru_base_inode = lru_inode_ctx->base_inode; +- list_del_init(&lru_inode_ctx->ilist); +- lru_inode = inode_find(linked_inode->table, +- lru_inode_ctx->stat.ia_gfid); +- /* If the lru inode was part of the pending-fsync list, +- * the base inode needs to be unref'd, the lru inode +- * deleted from fsync list and fsync'd in a new frame, +- * and then unlinked in memory and forgotten. +- */ +- if (!lru_base_inode) +- goto after_fsync_check; +- LOCK(&lru_base_inode->lock); +- LOCK(&lru_inode->lock); +- { +- if (!list_empty(&lru_inode_ctx->to_fsync_list)) { +- list_del_init(&lru_inode_ctx->to_fsync_list); +- lru_inode_ctx->fsync_needed = 0; +- do_fsync = _gf_true; +- __shard_inode_ctx_get(lru_base_inode, this, +- &lru_base_inode_ctx); +- lru_base_inode_ctx->fsync_count--; +- } +- } +- UNLOCK(&lru_inode->lock); +- UNLOCK(&lru_base_inode->lock); +- +- after_fsync_check: +- if (!do_fsync) { +- shard_make_block_bname(lru_inode_ctx->block_num, +- lru_inode_ctx->base_gfid, block_bname, +- sizeof(block_bname)); +- /* The following unref corresponds to the ref held at +- * the time the shard was added to the lru list. +- */ +- inode_unref(lru_inode); +- inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); +- inode_forget(lru_inode, 0); +- } else { +- /* The following unref corresponds to the ref +- * held when the shard was added to fsync list. +- */ +- inode_unref(lru_inode); +- fsync_inode = lru_inode; +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- } +- /* The following unref corresponds to the ref +- * held by inode_find() above. +- */ +- inode_unref(lru_inode); +- +- /* The following unref corresponds to the ref held on the base shard +- * at the time of adding shard inode to lru list +- */ +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- ctx->base_inode = inode_ref(base_inode); +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- } +- } else { +- /* If this is not the first time this inode is being operated on, move +- * it to the most recently used end of the list. +- */ +- list_move_tail(&ctx->ilist, &priv->ilist_head); +- } +- return fsync_inode; +-} ++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ loc_t *loc, ++ shard_post_update_size_fop_handler_t handler) { ++ int ret = -1; ++ int64_t *size_attr = NULL; ++ int64_t delta_blocks = 0; ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; + +-int +-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, +- int32_t op_ret, int32_t op_errno) +-{ +- switch (fop) { +- case GF_FOP_LOOKUP: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_STAT: +- SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSTAT: +- SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_TRUNCATE: +- SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FTRUNCATE: +- SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_MKNOD: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_LINK: +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_CREATE: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_UNLINK: +- SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_RENAME: +- SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_READ: +- SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, +- NULL, NULL); +- break; +- case GF_FOP_FSYNC: +- SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_REMOVEXATTR: +- SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FREMOVEXATTR: +- SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FGETXATTR: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_GETXATTR: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSETXATTR: +- SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETXATTR: +- SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETATTR: +- SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FSETATTR: +- SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_SEEK: +- SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ local = frame->local; ++ local->post_update_size_handler = handler; + +-int +-shard_common_inode_write_success_unwind(glusterfs_fop_t fop, +- call_frame_t *frame, int32_t op_ret) +-{ +- shard_local_t *local = NULL; ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ if (fd) ++ inode = fd->inode; ++ else ++ inode = loc->inode; ++ ++ /* If both size and block count have not changed, then skip the xattrop. ++ */ ++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks); ++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { ++ goto out; ++ } ++ ++ ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, ++ &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, ++ "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, ++ uuid_utoa(inode->gfid)); ++ GF_FREE(size_attr); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } + +- local = frame->local; ++ if (fd) ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); ++ else ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); + +- switch (fop) { +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ dict_unref(xattr_req); ++ return 0; + +-int +-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- char block_bname[256] = { +- 0, +- }; +- fd_t *anon_fd = cookie; +- inode_t *shard_inode = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++out: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; ++} ++ ++static inode_t *shard_link_internal_dir_inode(shard_local_t *local, ++ inode_t *inode, struct iatt *buf, ++ shard_internal_dir_type_t type) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ char *bname = NULL; ++ inode_t **priv_inode = NULL; ++ inode_t *parent = NULL; ++ ++ priv = THIS->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ bname = GF_SHARD_DIR; ++ priv_inode = &priv->dot_shard_inode; ++ parent = inode->table->root; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ priv_inode = &priv->dot_shard_rm_inode; ++ parent = priv->dot_shard_inode; ++ break; ++ default: ++ break; ++ } ++ ++ linked_inode = inode_link(inode, parent, bname, buf); ++ inode_lookup(linked_inode); ++ *priv_inode = linked_inode; ++ return linked_inode; ++} ++ ++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ shard_local_t *local = NULL; ++ inode_t *linked_inode = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ /* To-Do: Fix refcount increment per call to ++ * shard_link_internal_dir_inode(). ++ */ ++ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ shard_inode_ctx_mark_dir_refreshed(linked_inode, this); ++out: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_internal_dir_type_t type) { ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ break; ++ default: ++ break; ++ } ++ ++ inode = inode_find(this->itable, gfid); ++ ++ if (!shard_inode_ctx_needs_lookup(inode, this)) { ++ local->op_ret = 0; ++ goto out; ++ } + +- priv = this->private; ++ /* Plain assignment because the ref is already taken above through ++ * call to inode_find() ++ */ ++ loc.inode = inode; ++ gf_uuid_copy(loc.gfid, gfid); + +- if (anon_fd == NULL || op_ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, +- "fsync failed on shard"); +- goto out; +- } +- shard_inode = anon_fd->inode; ++ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, ++ NULL); ++ loc_wipe(&loc); + +- LOCK(&priv->lock); +- LOCK(&shard_inode->lock); +- { +- __shard_inode_ctx_get(shard_inode, this, &ctx); +- if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { +- shard_make_block_bname(ctx->block_num, shard_inode->gfid, +- block_bname, sizeof(block_bname)); +- inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); +- /* The following unref corresponds to the ref held by +- * inode_link() at the time the shard was created or +- * looked up +- */ +- inode_unref(shard_inode); +- inode_forget(shard_inode, 0); +- } +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&priv->lock); ++ return 0; + + out: +- if (anon_fd) +- fd_unref(anon_fd); +- STACK_DESTROY(frame->root); +- return 0; ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; + } + +-int +-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +-{ +- fd_t *anon_fd = NULL; +- call_frame_t *fsync_frame = NULL; ++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + +- fsync_frame = create_frame(this, this->ctx->pool); +- if (!fsync_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to fsync shard"); +- return -1; +- } ++ local = frame->local; + +- anon_fd = fd_anonymous(inode); +- if (!anon_fd) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create anon fd to" +- " fsync shard"); +- STACK_DESTROY(fsync_frame->root); +- return -1; +- } ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ if (!IA_ISDIR(buf->ia_type)) { ++ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, ++ "%s already exists and " ++ "is not a directory. Please remove it from all bricks " ++ "and try again", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = EIO; ++ goto unwind; ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; + +- STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, 1, NULL); +- return 0; +-} ++unwind: ++ local->post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ local->post_res_handler = post_res_handler; ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid of %s into dict", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } + +-int +-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler) +-{ +- int i = -1; +- uint32_t shard_idx_iter = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *res_inode = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- local->call_count = 0; +- shard_idx_iter = local->first_block; +- res_inode = local->resolver_base_inode; +- if (res_inode) +- gf_uuid_copy(gfid, res_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, ++ xattr_req); + +- if ((local->op_ret < 0) || (local->resolve_not)) +- goto out; ++ dict_unref(xattr_req); ++ return 0; + +- while (shard_idx_iter <= local->last_block) { +- i++; +- if (shard_idx_iter == 0) { +- local->inode_list[i] = inode_ref(res_inode); +- shard_idx_iter++; +- continue; +- } ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ if (free_gfid) ++ GF_FREE(gfid); ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, ++ dict_t *xdata, struct iatt *buf) { ++ int ret = 0; ++ uint64_t size = 0; ++ void *bsize = NULL; ++ ++ if (shard_inode_ctx_get_block_size(inode, this, &size)) { ++ /* Fresh lookup */ ++ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (!ret) ++ size = ntoh64(*((uint64_t *)bsize)); ++ /* If the file is sharded, set its block size, otherwise just ++ * set 0. ++ */ + +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- inode = NULL; +- inode = inode_resolve(this->itable, path); +- if (inode) { +- gf_msg_debug(this->name, 0, +- "Shard %d already " +- "present. gfid=%s. Saving inode for future.", +- shard_idx_iter, uuid_utoa(inode->gfid)); +- local->inode_list[i] = inode; +- /* Let the ref on the inodes that are already present +- * in inode table still be held so that they don't get +- * forgotten by the time the fop reaches the actual +- * write stage. +- */ +- LOCK(&priv->lock); +- { +- fsync_inode = __shard_update_shards_inode_list( +- inode, this, res_inode, shard_idx_iter, gfid); +- } +- UNLOCK(&priv->lock); +- shard_idx_iter++; +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +- continue; +- } else { +- local->call_count++; +- shard_idx_iter++; +- } +- } +-out: +- post_res_handler(frame, this); +- return 0; ++ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); ++ } ++ /* If the file is sharded, also set the remaining attributes, ++ * except for ia_size and ia_blocks. ++ */ ++ if (size) { ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ (void)shard_inode_ctx_invalidate(inode, this, buf); ++ } ++} ++ ++int shard_delete_shards(void *opaque); ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ ++int shard_start_background_deletion(xlator_t *this) { ++ int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ ++ cleanup_frame = create_frame(this, this->ctx->pool); ++ if (!cleanup_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "new frame to delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ ++ ret = synctask_new(this->ctx->env, shard_delete_shards, ++ shard_delete_shards_cbk, cleanup_frame, cleanup_frame); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "failed to create task to do background " ++ "cleanup of shards"); ++ STACK_DESTROY(cleanup_frame->root); ++ goto err; ++ } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ return ret; + } + +-int +-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, struct iatt *postparent) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t i_start_cleanup = _gf_false; + +- local = frame->local; ++ priv = this->private; + +- if ((local->fd) && (local->fd->inode)) +- inode = local->fd->inode; +- else if (local->loc.inode) +- inode = local->loc.inode; ++ if (op_ret < 0) ++ goto unwind; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_UPDATE_FILE_SIZE_FAILED, +- "Update to file size" +- " xattr failed on %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (IA_ISDIR(buf->ia_type)) ++ goto unwind; + +- if (shard_modify_size_and_block_count(&local->postbuf, dict)) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +-err: +- local->post_update_size_handler(frame, this); +- return 0; +-} ++ /* Also, if the file is sharded, get the file size and block cnt xattr, ++ * and store them in the stbuf appropriately. ++ */ + +-int +-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; ++ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(buf, xdata); + +- if (!size_attr_p) +- goto out; ++ /* If this was a fresh lookup, there are two possibilities: ++ * 1) If the file is sharded (indicated by the presence of block size ++ * xattr), store this block size, along with rdev and mode in its ++ * inode ctx. ++ * 2) If the file is not sharded, store size along with rdev and mode ++ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is ++ * already initialised to all zeroes, nothing more needs to be done. ++ */ + +- size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); +- if (!size_attr) +- goto out; ++ (void)shard_inode_ctx_update(inode, this, xdata, buf); + +- size_attr[0] = hton64(size); +- /* As sharding evolves, it _may_ be necessary to embed more pieces of +- * information within the same xattr. So allocating slots for them in +- * advance. For now, only bytes 0-63 and 128-191 which would make up the +- * current size and block count respectively of the file are valid. +- */ +- size_attr[2] = hton64(block_count); ++ LOCK(&priv->lock); ++ { ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; ++ i_start_cleanup = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); + +- *size_attr_p = size_attr; ++ if (!i_start_cleanup) ++ goto unwind; + +- ret = 0; +-out: +- return ret; ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { priv->first_lookup_done = _gf_false; } ++ UNLOCK(&priv->lock); ++ } ++ ++unwind: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, ++ postparent); ++ return 0; + } + +-int +-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, +- loc_t *loc, shard_post_update_size_fop_handler_t handler) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; +- int64_t delta_blocks = 0; +- inode_t *inode = NULL; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; ++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *xattr_req) { ++ int ret = -1; ++ int32_t op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = frame->local; +- local->post_update_size_handler = handler; ++ this->itable = loc->inode->table; ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; +- } ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (fd) +- inode = fd->inode; +- else +- inode = loc->inode; ++ frame->local = local; + +- /* If both size and block count have not changed, then skip the xattrop. +- */ +- delta_blocks = GF_ATOMIC_GET(local->delta_blocks); +- if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { +- goto out; +- } ++ loc_copy(&local->loc, loc); + +- ret = shard_set_size_attrs(local->delta_size + local->hole_size, +- delta_blocks, &size_attr); ++ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, +- "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict" ++ " value: key:%s for path %s", ++ GF_XATTR_SHARD_BLOCK_SIZE, loc->path); ++ goto err; + } ++ } + +- ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict. gfid=%s", +- GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); +- GF_FREE(size_attr); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s for path %s.", ++ GF_XATTR_SHARD_FILE_SIZE, loc->path); ++ goto err; + } ++ } + +- if (fd) +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fxattrop, fd, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- else +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->xattrop, loc, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- +- dict_unref(xattr_req); +- return 0; +- +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; +-} +- +-static inode_t * +-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, +- struct iatt *buf, shard_internal_dir_type_t type) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- char *bname = NULL; +- inode_t **priv_inode = NULL; +- inode_t *parent = NULL; +- +- priv = THIS->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- bname = GF_SHARD_DIR; +- priv_inode = &priv->dot_shard_inode; +- parent = inode->table->root; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- bname = GF_SHARD_REMOVE_ME_DIR; +- priv_inode = &priv->dot_shard_rm_inode; +- parent = priv->dot_shard_inode; +- break; +- default: +- break; +- } ++ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) ++ dict_del(xattr_req, GF_CONTENT_KEY); + +- linked_inode = inode_link(inode, parent, bname, buf); +- inode_lookup(linked_inode); +- *priv_inode = linked_inode; +- return linked_inode; ++ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- shard_local_t *local = NULL; +- inode_t *linked_inode = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +- +- local = frame->local; +- +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } ++ struct iatt *postparent) { ++ int ret = -1; ++ int32_t mask = SHARD_INODE_WRITE_MASK; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t ctx = { ++ 0, ++ }; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" ++ " failed : %s", ++ loc_gfid_utoa(&(local->loc))); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ if (shard_inode_ctx_get_all(inode, this, &ctx)) ++ mask = SHARD_ALL_MASK; ++ ++ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, ++ (mask | SHARD_MASK_REFRESH_RESET)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, ++ "Failed to set inode" ++ " write params into inode ctx for %s", ++ uuid_utoa(buf->ia_gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto unwind; ++ } ++ ++unwind: ++ local->handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ shard_post_fop_handler_t handler) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; ++ gf_boolean_t need_refresh = _gf_false; ++ ++ local = frame->local; ++ local->handler = handler; ++ ++ ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, ++ &need_refresh); ++ /* By this time, inode ctx should have been created either in create, ++ * mknod, readdirp or lookup. If not it is a bug! ++ */ ++ if ((ret == 0) && (need_refresh == _gf_false)) { ++ gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" ++ "Serving prebuf off the inode ctx cache", ++ uuid_utoa(loc->gfid)); ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ ++ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ ++ dict_unref(xattr_req); ++ return 0; + +- /* To-Do: Fix refcount increment per call to +- * shard_link_internal_dir_inode(). +- */ +- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- shard_inode_ctx_mark_dir_refreshed(linked_inode, this); + out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_internal_dir_type_t type) +-{ +- loc_t loc = { +- 0, +- }; +- inode_t *inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; ++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- break; +- default: +- break; +- } ++ local = frame->local; + +- inode = inode_find(this->itable, gfid); ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +- if (!shard_inode_ctx_needs_lookup(inode, this)) { +- local->op_ret = 0; +- goto out; +- } ++ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; ++} + +- /* Plain assignment because the ref is already taken above through +- * call to inode_find() +- */ +- loc.inode = inode; +- gf_uuid_copy(loc.gfid, gfid); ++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, +- NULL); +- loc_wipe(&loc); ++ local = frame->local; + +- return 0; ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +-out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; + } + +-int +-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, ++ "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) ++ : uuid_utoa((local->loc.inode)->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- if (!IA_ISDIR(buf->ia_type)) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, +- "%s already exists and " +- "is not a directory. Please remove it from all bricks " +- "and try again", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = EIO; +- goto unwind; +- } ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ local->xattr_rsp = dict_ref(xdata); + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; ++ if (local->loc.inode) ++ inode = local->loc.inode; ++ else ++ inode = local->fd->inode; ++ ++ shard_inode_ctx_invalidate(inode, this, &local->prebuf); + + unwind: +- local->post_res_handler(frame, this); +- return 0; ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- local->post_res_handler = post_res_handler; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid of %s into dict", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); ++ return 0; ++ } + +- STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, +- xattr_req); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- dict_unref(xattr_req); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + ++ frame->local = local; ++ ++ local->handler = shard_post_stat_handler; ++ loc_copy(&local->loc, loc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); ++ ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); ++ return 0; + err: +- if (xattr_req) +- dict_unref(xattr_req); +- if (free_gfid) +- GF_FREE(gfid); +- post_res_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); ++ return 0; + } + +-static void +-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, +- struct iatt *buf) +-{ +- int ret = 0; +- uint64_t size = 0; +- void *bsize = NULL; +- +- if (shard_inode_ctx_get_block_size(inode, this, &size)) { +- /* Fresh lookup */ +- ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (!ret) +- size = ntoh64(*((uint64_t *)bsize)); +- /* If the file is sharded, set its block size, otherwise just +- * set 0. +- */ ++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); +- } +- /* If the file is sharded, also set the remaining attributes, +- * except for ia_size and ia_blocks. +- */ +- if (size) { +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- (void)shard_inode_ctx_invalidate(inode, this, buf); +- } +-} ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_delete_shards(void *opaque); ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_start_background_deletion(xlator_t *this) +-{ +- int ret = 0; +- gf_boolean_t i_cleanup = _gf_true; +- shard_priv_t *priv = NULL; +- call_frame_t *cleanup_frame = NULL; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- LOCK(&priv->lock); +- { +- switch (priv->bg_del_state) { +- case SHARD_BG_DELETION_NONE: +- i_cleanup = _gf_true; +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- break; +- case SHARD_BG_DELETION_LAUNCHING: +- i_cleanup = _gf_false; +- break; +- case SHARD_BG_DELETION_IN_PROGRESS: +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- i_cleanup = _gf_false; +- break; +- default: +- break; +- } +- } +- UNLOCK(&priv->lock); +- if (!i_cleanup) +- return 0; +- +- cleanup_frame = create_frame(this, this->ctx->pool); +- if (!cleanup_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "new frame to delete shards"); +- ret = -ENOMEM; +- goto err; +- } ++ frame->local = local; + +- set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ local->handler = shard_post_fstat_handler; ++ local->fd = fd_ref(fd); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- ret = synctask_new(this->ctx->env, shard_delete_shards, +- shard_delete_shards_cbk, cleanup_frame, cleanup_frame); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "failed to create task to do background " +- "cleanup of shards"); +- STACK_DESTROY(cleanup_frame->root); +- goto err; +- } +- return 0; ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); ++ return 0; + err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- return ret; ++ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, struct iatt *postparent) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; +- gf_boolean_t i_start_cleanup = _gf_false; ++int shard_post_update_size_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- priv = this->private; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ return 0; ++} + +- if (IA_ISDIR(buf->ia_type)) +- goto unwind; ++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ inode_t *inode = NULL; ++ int64_t delta_blocks = 0; ++ shard_local_t *local = NULL; + +- /* Also, if the file is sharded, get the file size and block cnt xattr, +- * and store them in the stbuf appropriately. +- */ ++ local = frame->local; + +- if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(buf, xdata); +- +- /* If this was a fresh lookup, there are two possibilities: +- * 1) If the file is sharded (indicated by the presence of block size +- * xattr), store this block size, along with rdev and mode in its +- * inode ctx. +- * 2) If the file is not sharded, store size along with rdev and mode +- * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is +- * already initialised to all zeroes, nothing more needs to be done. +- */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + +- (void)shard_inode_ctx_update(inode, this, xdata, buf); ++ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" ++ " shard failed : %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ local->postbuf.ia_size = local->offset; ++ /* Let the delta be negative. We want xattrop to do subtraction */ ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, ++ postbuf->ia_blocks - prebuf->ia_blocks); ++ GF_ASSERT(delta_blocks <= 0); ++ local->postbuf.ia_blocks += delta_blocks; ++ local->hole_size = 0; ++ ++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ size_t last_shard_size_after = 0; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ /* A NULL inode could be due to the fact that the last shard which ++ * needs to be truncated does not exist due to it lying in a hole ++ * region. So the only thing left to do in that case would be an ++ * update to file size xattr. ++ */ ++ if (!inode) { ++ gf_msg_debug(this->name, 0, ++ "Last shard to be truncated absent" ++ " in backend: %s. Directly proceeding to update " ++ "file size", ++ uuid_utoa(inode->gfid)); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- LOCK(&priv->lock); +- { +- if (priv->first_lookup_done == _gf_false) { +- priv->first_lookup_done = _gf_true; +- i_start_cleanup = _gf_true; +- } +- } +- UNLOCK(&priv->lock); ++ SHARD_SET_ROOT_FS_ID(frame, local); + +- if (!i_start_cleanup) +- goto unwind; ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); + +- ret = shard_start_background_deletion(this); +- if (ret < 0) { +- LOCK(&priv->lock); +- { +- priv->first_lookup_done = _gf_false; +- } +- UNLOCK(&priv->lock); +- } ++ last_shard_size_after = (local->offset % local->block_size); + +-unwind: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, +- postparent); +- return 0; ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, ++ NULL); ++ loc_wipe(&loc); ++ return 0; + } + +-int +-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +-{ +- int ret = -1; +- int32_t op_errno = ENOMEM; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); +- } ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uint64_t block_count = 0; ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } ++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); ++ if (!ret) { ++ GF_ATOMIC_SUB(local->delta_blocks, block_count); ++ } else { ++ /* dict_get failed possibly due to a heterogeneous cluster? */ ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get key %s from dict during truncate of gfid %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ } ++ ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); ++ } ++ return 0; ++} ++ ++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { ++ int i = 1; ++ int ret = -1; ++ int call_count = 0; ++ uint32_t cur_block = 0; ++ uint32_t last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xdata_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ cur_block = local->first_block + 1; ++ last_block = local->last_block; ++ ++ /* Determine call count */ ++ for (i = 1; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ call_count++; ++ } ++ ++ if (!call_count) { ++ /* Call count = 0 implies that all of the shards that need to be ++ * unlinked do not exist. So shard xlator would now proceed to ++ * do the final truncate + size updates. ++ */ ++ gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " ++ "truncate absent in backend: %s. Directly " ++ "proceeding to update file size", ++ uuid_utoa(inode->gfid)); ++ local->postbuf.ia_size = local->offset; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->hole_size = 0; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->call_count = call_count; ++ i = 1; ++ xdata_req = dict_new(); ++ if (!xdata_req) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } ++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict during truncate of %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ dict_unref(xdata_req); ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict" +- " value: key:%s for path %s", +- GF_XATTR_SHARD_BLOCK_SIZE, loc->path); +- goto err; +- } ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ while (cur_block <= last_block) { ++ if (!local->inode_list[i]) { ++ cur_block++; ++ i++; ++ continue; ++ } ++ if (wind_failed) { ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, +- 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s for path %s.", +- GF_XATTR_SHARD_FILE_SIZE, loc->path); +- goto err; +- } ++ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s. Base file gfid = %s", ++ bname, uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[i]); + +- if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) +- dict_del(xattr_req, GF_CONTENT_KEY); ++ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, ++ 0, xdata_req); ++ loc_wipe(&loc); ++ next: ++ i++; ++ cur_block++; ++ if (!--call_count) ++ break; ++ } ++ dict_unref(xdata_req); ++ return 0; ++} + +- STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++int shard_truncate_do(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->num_blocks == 1) { ++ /* This means that there are no shards to be unlinked. ++ * The fop boils down to truncating the last shard, updating ++ * the size and unwinding. ++ */ ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); + return 0; ++ } else { ++ shard_truncate_htol(frame, this, local->loc.inode); ++ } ++ return 0; + } + +-int +-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int ret = -1; +- int32_t mask = SHARD_INODE_WRITE_MASK; +- shard_local_t *local = NULL; +- shard_inode_ctx_t ctx = { +- 0, +- }; +- +- local = frame->local; ++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_BASE_FILE_LOOKUP_FAILED, +- "Lookup on base file" +- " failed : %s", +- loc_gfid_utoa(&(local->loc))); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ shard_truncate_do(frame, this); ++ return 0; ++} ++ ++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, ++ struct iatt *buf) { ++ int list_index = 0; ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *linked_inode = NULL; ++ xlator_t *this = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; ++ ++ this = THIS; ++ priv = this->private; ++ if (local->loc.inode) { ++ gf_uuid_copy(gfid, local->loc.inode->gfid); ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { ++ gf_uuid_copy(gfid, local->base_gfid); ++ } ++ ++ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); ++ ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); ++ inode_lookup(linked_inode); ++ list_index = block_num - local->first_block; ++ local->inode_list[list_index] = linked_inode; ++ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(linked_inode, this, ++ base_inode, block_num, gfid); ++ } ++ UNLOCK(&priv->lock); ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++} ++ ++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uuid_t gfid = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if (op_ret < 0) { ++ /* Ignore absence of shards in the backend in truncate fop. */ ++ switch (local->fop) { ++ case GF_FOP_TRUNCATE: ++ case GF_FOP_FTRUNCATE: ++ case GF_FOP_RENAME: ++ case GF_FOP_UNLINK: ++ if (op_errno == ENOENT) ++ goto done; ++ break; ++ case GF_FOP_WRITE: ++ case GF_FOP_READ: ++ case GF_FOP_ZEROFILL: ++ case GF_FOP_DISCARD: ++ case GF_FOP_FALLOCATE: ++ if ((!local->first_lookup_done) && (op_errno == ENOENT)) { ++ LOCK(&frame->lock); ++ { local->create_count++; } ++ UNLOCK(&frame->lock); ++ goto done; ++ } ++ break; ++ default: ++ break; + } + +- if (shard_inode_ctx_get_all(inode, this, &ctx)) +- mask = SHARD_ALL_MASK; ++ /* else */ ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, ++ "Lookup on shard %d " ++ "failed. Base file gfid = %s", ++ shard_block_num, uuid_utoa(gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +- ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, +- (mask | SHARD_MASK_REFRESH_RESET)); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, +- "Failed to set inode" +- " write params into inode ctx for %s", +- uuid_utoa(buf->ia_gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; +- } ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +-unwind: +- local->handler(frame, this); ++done: ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wake(&local->barrier); + return 0; ++ } else { ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ if (!local->first_lookup_done) ++ local->first_lookup_done = _gf_true; ++ local->pls_fop_handler(frame, this); ++ } ++ } ++ return 0; + } + +-int +-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, +- shard_post_fop_handler_t handler) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; +- gf_boolean_t need_refresh = _gf_false; ++dict_t *shard_create_gfid_dict(dict_t *dict) { ++ int ret = 0; ++ dict_t *new = NULL; ++ unsigned char *gfid = NULL; + +- local = frame->local; +- local->handler = handler; ++ new = dict_copy_with_ref(dict, NULL); ++ if (!new) ++ return NULL; + +- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, +- &need_refresh); +- /* By this time, inode ctx should have been created either in create, +- * mknod, readdirp or lookup. If not it is a bug! +- */ +- if ((ret == 0) && (need_refresh == _gf_false)) { +- gf_msg_debug(this->name, 0, +- "Skipping lookup on base file: %s" +- "Serving prebuf off the inode ctx cache", +- uuid_utoa(loc->gfid)); +- goto out; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ ret = -1; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ++ ret = dict_set_gfuuid(new, "gfid-req", gfid, false); ++ ++out: ++ if (ret) { ++ dict_unref(new); ++ new = NULL; ++ GF_FREE(gfid); ++ } ++ ++ return new; ++} ++ ++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ shard_post_lookup_shards_fop_handler_t handler) { ++ int i = 0; ++ int ret = 0; ++ int count = 0; ++ int call_count = 0; ++ int32_t shard_idx_iter = 0; ++ int last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ dict_t *xattr_req = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ count = call_count = local->call_count; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ local->pls_fop_handler = handler; ++ if (local->lookup_shards_barriered) ++ local->barrier.waitfor = local->call_count; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ i++; ++ shard_idx_iter++; ++ continue; ++ } ++ ++ if (wind_failed) { ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; + } + +- xattr_req = dict_new(); ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ next: ++ shard_idx_iter++; ++ i++; ++ ++ if (!--call_count) ++ break; ++ } ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wait(&local->barrier, count); ++ local->pls_fop_handler(frame, this); ++ } ++ return 0; ++} ++ ++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ if (local->op_errno == ENOENT) { ++ /* If lookup on /.shard fails with ENOENT, it means that ++ * the file was 0-byte in size but truncated sometime in ++ * the past to a higher size which is reflected in the ++ * size xattr, and now being truncated to a lower size. ++ * In this case, the only thing that needs to be done is ++ * to update the size xattr of the file and unwind. ++ */ ++ local->first_block = local->last_block = 0; ++ local->num_blocks = 1; ++ local->call_count = 0; ++ local->op_ret = 0; ++ local->postbuf.ia_size = local->offset; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } else { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ if (!local->call_count) ++ shard_truncate_do(frame, this); ++ else ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_truncate_handler); ++ ++ return 0; ++} ++ ++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ /* First participant block here is the lowest numbered block that would ++ * hold the last byte of the file post successful truncation. ++ * Last participant block is the block that contains the last byte in ++ * the current state of the file. ++ * If (first block == last_block): ++ * then that means that the file only needs truncation of the ++ * first (or last since both are same) block. ++ * Else ++ * if (new_size % block_size == 0) ++ * then that means there is no truncate to be done with ++ * only shards from first_block + 1 through the last ++ * block needing to be unlinked. ++ * else ++ * both truncate of the first block and unlink of the ++ * remaining shards until end of file is required. ++ */ ++ local->first_block = ++ (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, ++ local->block_size); ++ local->last_block = ++ get_highest_block(0, local->prebuf.ia_size, local->block_size); ++ ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = ++ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ ++ if ((local->first_block == 0) && (local->num_blocks == 1)) { ++ if (local->fop == GF_FOP_TRUNCATE) ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, ++ local->xattr_req); ++ else ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, ++ local->xattr_req); ++ return 0; ++ } + +- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- dict_unref(xattr_req); +- return 0; ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_truncate_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; + +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ struct iatt tmp_stbuf = { ++ 0, ++ }; + +- local = frame->local; +- +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ local = frame->local; + +- SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ local->postbuf = tmp_stbuf = local->prebuf; ++ ++ if (local->prebuf.ia_size == local->offset) { ++ /* If the file size is same as requested size, unwind the call ++ * immediately. ++ */ ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, ++ NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, ++ &local->postbuf, NULL); ++ } else if (local->offset > local->prebuf.ia_size) { ++ /* If the truncate is from a lower to a higher size, set the ++ * new size xattr and unwind. ++ */ ++ local->hole_size = local->offset - local->prebuf.ia_size; ++ local->delta_size = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->postbuf.ia_size = local->offset; ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ } else { ++ /* ... else ++ * i. unlink all shards that need to be unlinked. ++ * ii. truncate the last of the shards. ++ * iii. update the new size using setxattr. ++ * and unwind the fop. ++ */ ++ local->hole_size = 0; ++ local->delta_size = (local->offset - local->prebuf.ia_size); ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_truncate_begin(frame, this); ++ } ++ return 0; + } + +-int +-shard_post_stat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++/* TO-DO: ++ * Fix updates to size and block count with racing write(s) and truncate(s). ++ */ + +- local = frame->local; ++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ off_t offset, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +-} ++ } + +-int +-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++ if (!this->itable) ++ this->itable = loc->inode->table; + +- local = frame->local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ loc_copy(&local->loc, loc); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->fop = GF_FOP_TRUNCATE; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->resolver_base_inode = loc->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, +- "stat failed: %s", +- local->fd ? uuid_utoa(local->fd->inode->gfid) +- : uuid_utoa((local->loc.inode)->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++err: ++ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FTRUNCATE; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->resolver_base_inode = fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- local->xattr_rsp = dict_ref(xdata); ++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (local->loc.inode) +- inode = local->loc.inode; +- else +- inode = local->fd->inode; ++ local = frame->local; + +- shard_inode_ctx_invalidate(inode, this, &local->prebuf); ++ if (op_ret == -1) ++ goto unwind; ++ ++ ret = ++ shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); + + unwind: +- local->handler(frame, this); +- return 0; +-} ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); + +-int +-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++ return 0; ++} + +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++ frame->local = local; ++ local->block_size = priv->block_size; ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); ++ return 0; ++} + +- frame->local = local; ++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ shard_local_t *local = NULL; + +- local->handler = shard_post_stat_handler; +- loc_copy(&local->loc, loc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local = frame->local; ++ if (op_ret < 0) ++ goto err; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); ++ buf->ia_size = local->prebuf.ia_size; ++ buf->ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); +- return 0; ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ local = frame->local; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if (local->op_ret < 0) { ++ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, ++ local->xattr_req); ++ return 0; ++} + +- if (!this->itable) +- this->itable = fd->inode->table; ++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } + +- frame->local = local; ++ if (!block_size) { ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, ++ oldloc, newloc, xdata); ++ return 0; ++ } + +- local->handler = shard_post_fstat_handler; +- local->fd = fd_ref(fd); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ if (!this->itable) ++ this->itable = oldloc->inode->table; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); +- return 0; ++ frame->local = local; ++ ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_link_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + +- local = frame->local; ++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); ++ local = frame->local; ++ ++ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { ++ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, ++ "failed to delete shards of %s", ++ uuid_utoa(local->resolver_base_inode->gfid)); + return 0; +-} ++ } ++ local->op_ret = 0; ++ local->op_errno = 0; + +-int +-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- int64_t delta_blocks = 0; +- shard_local_t *local = NULL; ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ return 0; ++} + +- local = frame->local; ++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local = frame->local; ++ local->lookup_shards_barriered = _gf_true; + +- inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode +- : local->fd->inode; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, +- "truncate on last" +- " shard failed : %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (!local->call_count) ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ else ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_unlink_handler); ++ return 0; ++} ++ ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *base_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ int unref_base_inode = 0; ++ int unref_shard_inode = 0; ++ ++ this = THIS; ++ priv = this->private; ++ ++ inode = local->inode_list[shard_block_num - local->first_block]; ++ shard_inode_ctx_get(inode, this, &ctx); ++ base_inode = ctx->base_inode; ++ if (base_inode) ++ gf_uuid_copy(gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, ctx->base_gfid); ++ shard_make_block_bname(shard_block_num, gfid, block_bname, ++ sizeof(block_bname)); ++ ++ LOCK(&priv->lock); ++ if (base_inode) ++ LOCK(&base_inode->lock); ++ LOCK(&inode->lock); ++ { ++ __shard_inode_ctx_get(inode, this, &ctx); ++ if (!list_empty(&ctx->ilist)) { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; ++ unref_base_inode++; ++ unref_shard_inode++; ++ GF_ASSERT(priv->inode_count >= 0); ++ } ++ if (ctx->fsync_needed) { ++ unref_base_inode++; ++ unref_shard_inode++; ++ list_del_init(&ctx->to_fsync_list); ++ if (base_inode) { ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ base_ictx->fsync_count--; ++ } ++ } ++ } ++ UNLOCK(&inode->lock); ++ if (base_inode) ++ UNLOCK(&base_inode->lock); + +- local->postbuf.ia_size = local->offset; +- /* Let the delta be negative. We want xattrop to do subtraction */ +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, +- postbuf->ia_blocks - prebuf->ia_blocks); +- GF_ASSERT(delta_blocks <= 0); +- local->postbuf.ia_blocks += delta_blocks; +- local->hole_size = 0; ++ inode_unlink(inode, priv->dot_shard_inode, block_bname); ++ inode_ref_reduce_by_n(inode, unref_shard_inode); ++ inode_forget(inode, 0); + +- shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ if (base_inode && unref_base_inode) ++ inode_ref_reduce_by_n(base_inode, unref_base_inode); ++ UNLOCK(&priv->lock); + } + +-int +-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- size_t last_shard_size_after = 0; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; ++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- /* A NULL inode could be due to the fact that the last shard which +- * needs to be truncated does not exist due to it lying in a hole +- * region. So the only thing left to do in that case would be an +- * update to file size xattr. +- */ +- if (!inode) { +- gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } ++ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} + +- SHARD_SET_ROOT_FS_ID(frame, local); ++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; + +- loc.inode = inode_ref(inode); +- gf_uuid_copy(loc.gfid, inode->gfid); ++ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preoldparent, &local->postoldparent, ++ local->xattr_rsp); ++ return 0; ++} + +- last_shard_size_after = (local->offset % local->block_size); ++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ shard_local_t *local = NULL; + +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, +- NULL); +- loc_wipe(&loc); +- return 0; +-} ++ local = frame->local; + +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +-int +-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uint64_t block_count = 0; +- shard_local_t *local = NULL; ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ int i = 0; ++ int ret = -1; ++ int count = 0; ++ uint32_t cur_block = 0; ++ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ ++ char *bname = NULL; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ count++; ++ } ++ ++ if (!count) { ++ /* callcount = 0 implies that all of the shards that need to be ++ * unlinked are non-existent (in other words the file is full of ++ * holes). ++ */ ++ gf_msg_debug(this->name, 0, "All shards that need to be " ++ "unlinked are non-existent: %s", ++ uuid_utoa(gfid)); ++ return 0; ++ } + +- local = frame->local; ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ local->barrier.waitfor = count; ++ cur_block = cur_block_idx + local->first_block; + +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); +- if (!ret) { +- GF_ATOMIC_SUB(local->delta_blocks, block_count); +- } else { +- /* dict_get failed possibly due to a heterogeneous cluster? */ +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get key %s from dict during truncate of gfid %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- } +- return 0; +-} +- +-int +-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 1; +- int ret = -1; +- int call_count = 0; +- uint32_t cur_block = 0; +- uint32_t last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xdata_req = NULL; +- +- local = frame->local; +- priv = this->private; +- +- cur_block = local->first_block + 1; +- last_block = local->last_block; +- +- /* Determine call count */ +- for (i = 1; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- call_count++; +- } ++ while (cur_block_idx < local->num_blocks) { ++ if (!local->inode_list[cur_block_idx]) ++ goto next; + +- if (!call_count) { +- /* Call count = 0 implies that all of the shards that need to be +- * unlinked do not exist. So shard xlator would now proceed to +- * do the final truncate + size updates. +- */ +- gf_msg_debug(this->name, 0, +- "Shards to be unlinked as part of " +- "truncate absent in backend: %s. Directly " +- "proceeding to update file size", +- uuid_utoa(inode->gfid)); +- local->postbuf.ia_size = local->offset; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->hole_size = 0; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; ++ if (wind_failed) { ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- local->call_count = call_count; +- i = 1; +- xdata_req = dict_new(); +- if (!xdata_req) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } +- ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict during truncate of %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- dict_unref(xdata_req); +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- SHARD_SET_ROOT_FS_ID(frame, local); +- while (cur_block <= last_block) { +- if (!local->inode_list[i]) { +- cur_block++; +- i++; +- continue; +- } +- if (wind_failed) { +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s. Base file gfid = %s", +- bname, uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[i]); +- +- STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); +- loc_wipe(&loc); +- next: +- i++; +- cur_block++; +- if (!--call_count) +- break; +- } +- dict_unref(xdata_req); +- return 0; +-} ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[cur_block_idx]); + +-int +-shard_truncate_do(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, ++ (void *)(long)cur_block, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, ++ local->xattr_req); ++ loc_wipe(&loc); ++ next: ++ cur_block++; ++ cur_block_idx++; ++ } ++ syncbarrier_wait(&local->barrier, count); ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} ++ ++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, ++ int now, int first_block, ++ gf_dirent_t *entry) { ++ int i = 0; ++ int ret = 0; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = cleanup_frame->local; ++ ++ local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ return -ENOMEM; ++ ++ local->first_block = first_block; ++ local->last_block = first_block + now - 1; ++ local->num_blocks = now; ++ gf_uuid_parse(entry->d_name, gfid); ++ gf_uuid_copy(local->base_gfid, gfid); ++ local->resolver_base_inode = inode_find(this->itable, gfid); ++ local->call_count = 0; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) { ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ return -errno; ++ } ++ shard_common_resolve_shards(cleanup_frame, this, ++ shard_post_resolve_unlink_handler); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ if (local->op_ret) ++ ret = -local->op_errno; ++ syncbarrier_destroy(&local->barrier); ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ STACK_RESET(cleanup_frame->root); ++ return ret; ++} ++ ++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = 0; ++ int shard_count = 0; ++ int first_block = 0; ++ int now = 0; ++ uint64_t size = 0; ++ uint64_t block_size = 0; ++ uint64_t size_array[4] = { ++ 0, ++ }; ++ void *bsize = NULL; ++ void *size_attr = NULL; ++ dict_t *xattr_rsp = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = cleanup_frame->local; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, ++ &xattr_rsp); ++ if (ret) ++ goto err; ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ goto err; ++ } ++ block_size = ntoh64(*((uint64_t *)bsize)); ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ goto err; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ size = ntoh64(size_array[0]); ++ ++ shard_count = (size / block_size) - 1; ++ if (shard_count < 0) { ++ gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ /* File size < shard-block-size, so nothing to delete */ ++ ret = 0; ++ goto delete_marker; ++ } ++ if ((size % block_size) > 0) ++ shard_count++; ++ ++ if (shard_count == 0) { ++ gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ gf_msg_debug(this->name, 0, ++ "base file = %s, " ++ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " ++ "shard_count=%d", ++ entry->d_name, block_size, size, shard_count); ++ ++ /* Perform a gfid-based lookup to see if gfid corresponding to marker ++ * file's base name exists. ++ */ ++ loc_wipe(&loc); ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ gf_uuid_parse(entry->d_name, loc.gfid); ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (!ret) { ++ gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " ++ "%s is present. Skipping shard deletion. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } + +- local = frame->local; ++ first_block = 1; + +- if (local->num_blocks == 1) { +- /* This means that there are no shards to be unlinked. +- * The fop boils down to truncating the last shard, updating +- * the size and unwinding. +- */ +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- return 0; ++ while (shard_count) { ++ if (shard_count < local->deletion_rate) { ++ now = shard_count; ++ shard_count = 0; + } else { +- shard_truncate_htol(frame, this, local->loc.inode); +- } +- return 0; +-} +- +-int +-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ now = local->deletion_rate; ++ shard_count -= local->deletion_rate; + } + +- shard_truncate_do(frame, this); +- return 0; +-} ++ gf_msg_debug(this->name, 0, "deleting %d shards starting from " ++ "block %d of gfid %s", ++ now, first_block, entry->d_name); ++ ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, ++ entry); ++ if (ret) ++ goto err; ++ first_block += now; ++ } + +-void +-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, +- struct iatt *buf) +-{ +- int list_index = 0; +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *linked_inode = NULL; +- xlator_t *this = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- inode_t *base_inode = NULL; +- +- this = THIS; +- priv = this->private; +- if (local->loc.inode) { +- gf_uuid_copy(gfid, local->loc.inode->gfid); +- base_inode = local->loc.inode; +- } else if (local->resolver_base_inode) { +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- base_inode = local->resolver_base_inode; ++delete_marker: ++ loc_wipe(&loc); ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to delete %s " ++ "from /%s", ++ entry->d_name, GF_SHARD_REMOVE_ME_DIR); ++err: ++ if (xattr_rsp) ++ dict_unref(xattr_rsp); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = -1; ++ loc_t loc = { ++ 0, ++ }; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ loc.inode = inode_ref(priv->dot_shard_rm_inode); ++ ++ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } ++ goto out; ++ } ++ { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } ++ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = 0; ++ char *bname = NULL; ++ loc_t *loc = NULL; ++ shard_priv_t *priv = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ struct iatt stbuf = { ++ 0, ++ }; ++ ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ loc = &local->dot_shard_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ bname = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ loc = &local->dot_shard_rm_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ loc->inode = inode_find(this->itable, gfid); ++ if (!loc->inode) { ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset " ++ "dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); ++ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, ++ NULL); ++ if (ret < 0) { ++ if (ret != -ENOENT) ++ gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Lookup on %s failed, exiting", bname); ++ goto err; + } else { +- gf_uuid_copy(gfid, local->base_gfid); ++ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + } ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, ++ gf_dirent_t *entry) { ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.parent = inode_ref(local->fd->inode); ++ ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (ret < 0) { ++ goto err; ++ } ++ entry->inode = inode_ref(loc.inode); ++ ret = 0; ++err: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards(void *opaque) { ++ int ret = 0; ++ off_t offset = 0; ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *link_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *entry = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; ++ ++ this = THIS; ++ priv = this->private; ++ INIT_LIST_HEAD(&entries.list); ++ ++ cleanup_frame = opaque; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create local to " ++ "delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; ++ ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ local->deletion_rate = priv->deletion_rate; ++ ++ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".shard absent. Nothing to" ++ " delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } + +- shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); +- +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); +- inode_lookup(linked_inode); +- list_index = block_num - local->first_block; +- local->inode_list[list_index] = linked_inode; +- ++ ret = shard_resolve_internal_dir(this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".remove_me absent. " ++ "Nothing to delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); ++ if (!local->fd) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ for (;;) { ++ offset = 0; + LOCK(&priv->lock); + { +- fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, base_inode, block_num, gfid); ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } + } + UNLOCK(&priv->lock); +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +-} +- +-int +-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, int32_t op_errno, +- inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uuid_t gfid = { +- 0, +- }; +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (local->resolver_base_inode) +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- if (op_ret < 0) { +- /* Ignore absence of shards in the backend in truncate fop. */ +- switch (local->fop) { +- case GF_FOP_TRUNCATE: +- case GF_FOP_FTRUNCATE: +- case GF_FOP_RENAME: +- case GF_FOP_UNLINK: +- if (op_errno == ENOENT) +- goto done; +- break; +- case GF_FOP_WRITE: +- case GF_FOP_READ: +- case GF_FOP_ZEROFILL: +- case GF_FOP_DISCARD: +- case GF_FOP_FALLOCATE: +- if ((!local->first_lookup_done) && (op_errno == ENOENT)) { +- LOCK(&frame->lock); +- { +- local->create_count++; +- } +- UNLOCK(&frame->lock); +- goto done; +- } +- break; +- default: +- break; +- } +- +- /* else */ +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_LOOKUP_SHARD_FAILED, +- "Lookup on shard %d " +- "failed. Base file gfid = %s", +- shard_block_num, uuid_utoa(gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_link_block_inode(local, shard_block_num, inode, buf); +- +-done: +- if (local->lookup_shards_barriered) { +- syncbarrier_wake(&local->barrier); +- return 0; +- } else { +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- if (!local->first_lookup_done) +- local->first_lookup_done = _gf_true; +- local->pls_fop_handler(frame, this); +- } +- } +- return 0; +-} +- +-dict_t * +-shard_create_gfid_dict(dict_t *dict) +-{ +- int ret = 0; +- dict_t *new = NULL; +- unsigned char *gfid = NULL; +- +- new = dict_copy_with_ref(dict, NULL); +- if (!new) +- return NULL; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); +- if (!gfid) { +- ret = -1; +- goto out; +- } +- +- gf_uuid_generate(gfid); +- +- ret = dict_set_gfuuid(new, "gfid-req", gfid, false); +- +-out: +- if (ret) { +- dict_unref(new); +- new = NULL; +- GF_FREE(gfid); +- } +- +- return new; +-} ++ if (done) ++ break; ++ while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, ++ &entries, local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry(entry, &entries.list, list) { ++ offset = entry->d_off; + +-int +-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, +- shard_post_lookup_shards_fop_handler_t handler) +-{ +- int i = 0; +- int ret = 0; +- int count = 0; +- int call_count = 0; +- int32_t shard_idx_iter = 0; +- int last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- gf_boolean_t wind_failed = _gf_false; +- dict_t *xattr_req = NULL; +- +- priv = this->private; +- local = frame->local; +- count = call_count = local->call_count; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- local->pls_fop_handler = handler; +- if (local->lookup_shards_barriered) +- local->barrier.waitfor = local->call_count; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) ++ continue; + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- i++; +- shard_idx_iter++; ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry(this, local, entry); ++ if (ret < 0) + continue; + } ++ link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, ++ &entry->d_stat); + +- if (wind_failed) { +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; ++ gf_msg_debug(this->name, 0, "Initiating deletion of " ++ "shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, ++ link_inode); ++ inode_unlink(link_inode, local->fd->inode, entry->d_name); ++ inode_unref(link_inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of gfid %s", entry->d_name); ++ continue; + } ++ gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, ++ "Deleted " ++ "shards of gfid=%s from backend", ++ entry->d_name); ++ } ++ gf_dirent_free(&entries); ++ if (ret) ++ break; ++ } ++ } ++ ret = 0; ++ loc_wipe(&loc); ++ return ret; + +- STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- next: +- shard_idx_iter++; +- i++; +- +- if (!--call_count) +- break; +- } +- if (local->lookup_shards_barriered) { +- syncbarrier_wait(&local->barrier, count); +- local->pls_fop_handler(frame, this); +- } +- return 0; ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->inodelk_frame; ++ lk_local = lk_frame->local; ++ local->inodelk_frame = NULL; ++ loc = &local->int_inodelk.loc; ++ lock = &lk_local->int_inodelk; ++ lock->flock.l_type = F_UNLCK; ++ ++ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, ++ &lock->flock, NULL); ++ local->int_inodelk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata); ++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ loc_t *dst_loc = NULL; ++ loc_t tmp_loc = { ++ 0, ++ }; ++ shard_local_t *local = frame->local; ++ ++ if (local->dst_block_size) { ++ tmp_loc.parent = inode_ref(local->loc2.parent); ++ ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on pargfid=%s bname=%s", ++ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ tmp_loc.name = strrchr(tmp_loc.path, '/'); ++ if (tmp_loc.name) ++ tmp_loc.name++; ++ dst_loc = &tmp_loc; ++ } else { ++ dst_loc = &local->loc2; ++ } ++ ++ /* To-Do: Request open-fd count on dst base file */ ++ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, ++ local->xattr_req); ++ loc_wipe(&tmp_loc); ++ return 0; ++err: ++ loc_wipe(&tmp_loc); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); ++ ++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Xattrop on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, ++ local->newloc.name); ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; ++} ++ ++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ dict_t *xdata = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ xdata = dict_new(); ++ if (!xdata) ++ goto err; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, &local->newloc, ++ GF_XATTROP_GET_AND_SET, xdata, NULL); ++ dict_unref(xdata); ++ return 0; ++err: ++ if (xdata) ++ dict_unref(xdata); ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- if (local->op_errno == ENOENT) { +- /* If lookup on /.shard fails with ENOENT, it means that +- * the file was 0-byte in size but truncated sometime in +- * the past to a higher size which is reflected in the +- * size xattr, and now being truncated to a lower size. +- * In this case, the only thing that needs to be done is +- * to update the size xattr of the file and unwind. +- */ +- local->first_block = local->last_block = 0; +- local->num_blocks = 1; +- local->call_count = 0; +- local->op_ret = 0; +- local->postbuf.ia_size = local->offset; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } else { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- } +- +- if (!local->call_count) +- shard_truncate_do(frame, this); +- else +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_truncate_handler); +- +- return 0; ++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Lookup on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ shard_set_size_attrs_on_marker_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_truncate_begin(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- /* First participant block here is the lowest numbered block that would +- * hold the last byte of the file post successful truncation. +- * Last participant block is the block that contains the last byte in +- * the current state of the file. +- * If (first block == last_block): +- * then that means that the file only needs truncation of the +- * first (or last since both are same) block. +- * Else +- * if (new_size % block_size == 0) +- * then that means there is no truncate to be done with +- * only shards from first_block + 1 through the last +- * block needing to be unlinked. +- * else +- * both truncate of the first block and unlink of the +- * remaining shards until end of file is required. +- */ +- local->first_block = (local->offset == 0) +- ? 0 +- : get_lowest_block(local->offset - 1, +- local->block_size); +- local->last_block = get_highest_block(0, local->prebuf.ia_size, +- local->block_size); +- +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) +- ? local->loc.inode +- : local->fd->inode; +- +- if ((local->first_block == 0) && (local->num_blocks == 1)) { +- if (local->fop == GF_FOP_TRUNCATE) +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &local->loc, +- local->offset, local->xattr_req); +- else +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, local->fd, +- local->offset, local->xattr_req); +- return 0; +- } ++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ dict_t *xattr_req = NULL; ++ shard_local_t *local = NULL; + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ local = frame->local; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, +- shard_post_resolve_truncate_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_truncate_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; + ++ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- struct iatt tmp_stbuf = { +- 0, +- }; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = tmp_stbuf = local->prebuf; +- +- if (local->prebuf.ia_size == local->offset) { +- /* If the file size is same as requested size, unwind the call +- * immediately. +- */ +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- } else if (local->offset > local->prebuf.ia_size) { +- /* If the truncate is from a lower to a higher size, set the +- * new size xattr and unwind. +- */ +- local->hole_size = local->offset - local->prebuf.ia_size; +- local->delta_size = 0; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->postbuf.ia_size = local->offset; +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); ++int shard_create_marker_file_under_remove_me_cbk( ++ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (op_ret < 0) { ++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Marker file creation " ++ "failed while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; + } else { +- /* ... else +- * i. unlink all shards that need to be unlinked. +- * ii. truncate the last of the shards. +- * iii. update the new size using setxattr. +- * and unwind the fop. +- */ +- local->hole_size = 0; +- local->delta_size = (local->offset - local->prebuf.ia_size); +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_truncate_begin(frame, this); +- } +- return 0; +-} +- +-/* TO-DO: +- * Fix updates to size and block count with racing write(s) and truncate(s). +- */ +- +-int +-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; ++ shard_lookup_marker_file(frame, this); ++ return 0; + } ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = loc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- loc_copy(&local->loc, loc); +- local->offset = offset; +- local->block_size = block_size; +- local->fop = GF_FOP_TRUNCATE; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->resolver_base_inode = loc->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; + ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = fd->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->offset = offset; +- local->block_size = block_size; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FTRUNCATE; ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++} ++ ++int shard_create_marker_file_under_remove_me(call_frame_t *frame, ++ xlator_t *this, loc_t *loc) { ++ int ret = 0; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ char g1[64] = { ++ 0, ++ }; ++ char g2[64] = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ local->newloc.inode = inode_new(this->itable); ++ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), ++ (char **)&local->newloc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on " ++ "pargfid=%s bname=%s", ++ uuid_utoa_r(priv->dot_shard_rm_gfid, g1), ++ uuid_utoa_r(loc->inode->gfid, g2)); ++ goto err; ++ } ++ local->newloc.name = strrchr(local->newloc.path, '/'); ++ if (local->newloc.name) ++ local->newloc.name++; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ ++ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ ++ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, ++ 0, 0, 0644, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); +- local->resolver_base_inode = fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; + err: +- shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, ++ NULL, NULL, NULL, NULL, NULL); ++ return 0; + } + +-int +-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, buf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +- return 0; +-} ++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +-int +-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, +- dev_t rdev, mode_t umask, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; ++ local = frame->local; + +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } else { ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ local->preoldparent = *preparent; ++ local->postoldparent = *postparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } + +- frame->local = local; +- local->block_size = priv->block_size; +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; + } ++ } + +- STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); +- return 0; +-} +- +-int32_t +-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (op_ret < 0) +- goto err; +- +- shard_inode_ctx_set(inode, this, buf, 0, +- SHARD_MASK_NLINK | SHARD_MASK_TIMES); +- buf->ia_size = local->prebuf.ia_size; +- buf->ia_blocks = local->prebuf.ia_blocks; +- +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ ++ shard_unlink_cbk(frame, this); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; ++ ++ /* To-Do: Request open-fd count on base file */ ++ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, ++ local->xattr_req); ++ return 0; ++} ++ ++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_entrylk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->entrylk_frame; ++ lk_local = lk_frame->local; ++ local->entrylk_frame = NULL; ++ lock = &lk_local->int_entrylk; ++ loc = &lock->loc; ++ ++ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, loc, ++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, ++ NULL); ++ local->int_entrylk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_create_marker_file_under_remove_me(frame, this, ++ &local->int_inodelk.loc); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-entrylk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_entrylk.acquired_lock = _gf_true; ++ shard_post_entrylk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, ++ uuid_t gfid) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_local_t *entrylk_local = NULL; ++ shard_entrylk_t *int_entrylk = NULL; ++ call_frame_t *entrylk_frame = NULL; ++ ++ local = frame->local; ++ entrylk_frame = create_frame(this, this->ctx->pool); ++ if (!entrylk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock marker file"); ++ goto err; ++ } ++ ++ entrylk_local = mem_get0(this->local_pool); ++ if (!entrylk_local) { ++ STACK_DESTROY(entrylk_frame->root); ++ goto err; ++ } ++ ++ entrylk_frame->local = entrylk_local; ++ entrylk_local->main_frame = frame; ++ int_entrylk = &entrylk_local->int_entrylk; ++ ++ int_entrylk->loc.inode = inode_ref(inode); ++ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); ++ local->entrylk_frame = entrylk_frame; ++ gf_uuid_unparse(gfid, gfid_str); ++ int_entrylk->basename = gf_strdup(gfid_str); ++ ++ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, ++ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, +- NULL, NULL, NULL, NULL); +- return 0; +- } +- +- STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, +- local->xattr_req); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int32_t +-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (!block_size) { +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, +- oldloc, newloc, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = oldloc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_link_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); +- +-int +-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); +- return 0; +- } +- local->op_ret = 0; +- local->op_errno = 0; +- +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- return 0; +-} +- +-int +-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- local->lookup_shards_barriered = _gf_true; +- +- if (!local->call_count) +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- else +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_unlink_handler); +- return 0; +-} +- +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +-{ +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *base_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- int unref_base_inode = 0; +- int unref_shard_inode = 0; +- +- this = THIS; +- priv = this->private; +- +- inode = local->inode_list[shard_block_num - local->first_block]; +- shard_inode_ctx_get(inode, this, &ctx); +- base_inode = ctx->base_inode; +- if (base_inode) +- gf_uuid_copy(gfid, base_inode->gfid); +- else +- gf_uuid_copy(gfid, ctx->base_gfid); +- shard_make_block_bname(shard_block_num, gfid, block_bname, +- sizeof(block_bname)); +- +- LOCK(&priv->lock); +- if (base_inode) +- LOCK(&base_inode->lock); +- LOCK(&inode->lock); +- { +- __shard_inode_ctx_get(inode, this, &ctx); +- if (!list_empty(&ctx->ilist)) { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- unref_base_inode++; +- unref_shard_inode++; +- GF_ASSERT(priv->inode_count >= 0); +- } +- if (ctx->fsync_needed) { +- unref_base_inode++; +- unref_shard_inode++; +- list_del_init(&ctx->to_fsync_list); +- if (base_inode) { +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- base_ictx->fsync_count--; +- } +- } +- } +- UNLOCK(&inode->lock); +- if (base_inode) +- UNLOCK(&base_inode->lock); +- +- inode_unlink(inode, priv->dot_shard_inode, block_bname); +- inode_ref_reduce_by_n(inode, unref_shard_inode); +- inode_forget(inode, 0); +- +- if (base_inode && unref_base_inode) +- inode_ref_reduce_by_n(base_inode, unref_base_inode); +- UNLOCK(&priv->lock); +-} +- +-int +-shard_rename_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); +- return 0; +-} +- +-int32_t +-shard_unlink_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, +- &local->preoldparent, &local->postoldparent, +- local->xattr_rsp); +- return 0; +-} +- +-int +-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- syncbarrier_wake(&local->barrier); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 0; +- int ret = -1; +- int count = 0; +- uint32_t cur_block = 0; +- uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ +- char *bname = NULL; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- count++; +- } +- +- if (!count) { +- /* callcount = 0 implies that all of the shards that need to be +- * unlinked are non-existent (in other words the file is full of +- * holes). +- */ +- gf_msg_debug(this->name, 0, +- "All shards that need to be " +- "unlinked are non-existent: %s", +- uuid_utoa(gfid)); +- return 0; +- } +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- local->barrier.waitfor = count; +- cur_block = cur_block_idx + local->first_block; +- +- while (cur_block_idx < local->num_blocks) { +- if (!local->inode_list[cur_block_idx]) +- goto next; +- +- if (wind_failed) { +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[cur_block_idx]); +- +- STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, +- local->xattr_req); +- loc_wipe(&loc); +- next: +- cur_block++; +- cur_block_idx++; +- } +- syncbarrier_wait(&local->barrier, count); +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- return 0; +-} +- +-int +-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, +- int now, int first_block, gf_dirent_t *entry) +-{ +- int i = 0; +- int ret = 0; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; +- +- local = cleanup_frame->local; +- +- local->inode_list = GF_CALLOC(now, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- return -ENOMEM; +- +- local->first_block = first_block; +- local->last_block = first_block + now - 1; +- local->num_blocks = now; +- gf_uuid_parse(entry->d_name, gfid); +- gf_uuid_copy(local->base_gfid, gfid); +- local->resolver_base_inode = inode_find(this->itable, gfid); +- local->call_count = 0; +- ret = syncbarrier_init(&local->barrier); +- if (ret) { +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- return -errno; +- } +- shard_common_resolve_shards(cleanup_frame, this, +- shard_post_resolve_unlink_handler); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- if (local->op_ret) +- ret = -local->op_errno; +- syncbarrier_destroy(&local->barrier); +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- STACK_RESET(cleanup_frame->root); +- return ret; +-} +- +-int +-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = 0; +- int shard_count = 0; +- int first_block = 0; +- int now = 0; +- uint64_t size = 0; +- uint64_t block_size = 0; +- uint64_t size_array[4] = { +- 0, +- }; +- void *bsize = NULL; +- void *size_attr = NULL; +- dict_t *xattr_rsp = NULL; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = cleanup_frame->local; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset dict"); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, +- &xattr_rsp); +- if (ret) +- goto err; +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- goto err; +- } +- block_size = ntoh64(*((uint64_t *)bsize)); +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- goto err; +- } +- +- memcpy(size_array, size_attr, sizeof(size_array)); +- size = ntoh64(size_array[0]); +- +- shard_count = (size / block_size) - 1; +- if (shard_count < 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s hasn't grown beyond " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- /* File size < shard-block-size, so nothing to delete */ +- ret = 0; +- goto delete_marker; +- } +- if ((size % block_size) > 0) +- shard_count++; +- +- if (shard_count == 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s is exactly equal to " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- gf_msg_debug(this->name, 0, +- "base file = %s, " +- "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 +- ", " +- "shard_count=%d", +- entry->d_name, block_size, size, shard_count); +- +- /* Perform a gfid-based lookup to see if gfid corresponding to marker +- * file's base name exists. +- */ +- loc_wipe(&loc); +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- gf_uuid_parse(entry->d_name, loc.gfid); +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (!ret) { +- gf_msg_debug(this->name, 0, +- "Base shard corresponding to gfid " +- "%s is present. Skipping shard deletion. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- +- first_block = 1; +- +- while (shard_count) { +- if (shard_count < local->deletion_rate) { +- now = shard_count; +- shard_count = 0; +- } else { +- now = local->deletion_rate; +- shard_count -= local->deletion_rate; +- } +- +- gf_msg_debug(this->name, 0, +- "deleting %d shards starting from " +- "block %d of gfid %s", +- now, first_block, entry->d_name); +- ret = shard_regulated_shards_deletion(cleanup_frame, this, now, +- first_block, entry); +- if (ret) +- goto err; +- first_block += now; +- } +- +-delete_marker: +- loc_wipe(&loc); +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to delete %s " +- "from /%s", +- entry->d_name, GF_SHARD_REMOVE_ME_DIR); +-err: +- if (xattr_rsp) +- dict_unref(xattr_rsp); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = -1; +- loc_t loc = { +- 0, +- }; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- loc.inode = inode_ref(priv->dot_shard_rm_inode); +- +- ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); +- if (ret < 0) { +- if (ret == -EAGAIN) { +- ret = 0; +- } +- goto out; +- } +- { +- ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); +- } +- syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +-out: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) +-{ +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = 0; +- char *bname = NULL; +- loc_t *loc = NULL; +- shard_priv_t *priv = NULL; +- uuid_t gfid = { +- 0, +- }; +- struct iatt stbuf = { +- 0, +- }; +- +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- loc = &local->dot_shard_loc; +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- bname = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- loc = &local->dot_shard_rm_loc; +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- bname = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- +- loc->inode = inode_find(this->itable, gfid); +- if (!loc->inode) { +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset " +- "dict"); +- ret = -ENOMEM; +- goto err; +- } +- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); +- ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, +- local->xattr_req, NULL); +- if (ret < 0) { +- if (ret != -ENOENT) +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Lookup on %s failed, exiting", bname); +- goto err; +- } else { +- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); +- } +- } +- ret = 0; +-err: +- return ret; +-} +- +-int +-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, +- gf_dirent_t *entry) +-{ +- int ret = 0; +- loc_t loc = { +- 0, +- }; +- +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- loc.parent = inode_ref(local->fd->inode); +- +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (ret < 0) { +- goto err; +- } +- entry->inode = inode_ref(loc.inode); +- ret = 0; +-err: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards(void *opaque) +-{ +- int ret = 0; +- off_t offset = 0; +- loc_t loc = { +- 0, +- }; +- inode_t *link_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- gf_dirent_t entries; +- gf_dirent_t *entry = NULL; +- call_frame_t *cleanup_frame = NULL; +- gf_boolean_t done = _gf_false; +- +- this = THIS; +- priv = this->private; +- INIT_LIST_HEAD(&entries.list); +- +- cleanup_frame = opaque; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create local to " +- "delete shards"); +- ret = -ENOMEM; +- goto err; +- } +- cleanup_frame->local = local; +- local->fop = GF_FOP_UNLINK; +- +- local->xattr_req = dict_new(); +- if (!local->xattr_req) { +- ret = -ENOMEM; +- goto err; +- } +- local->deletion_rate = priv->deletion_rate; +- +- ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".shard absent. Nothing to" +- " delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- ret = shard_resolve_internal_dir(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".remove_me absent. " +- "Nothing to delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); +- if (!local->fd) { +- ret = -ENOMEM; +- goto err; +- } +- +- for (;;) { +- offset = 0; +- LOCK(&priv->lock); +- { +- if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { +- priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; +- } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- done = _gf_true; +- } +- } +- UNLOCK(&priv->lock); +- if (done) +- break; +- while ( +- (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, +- &entries, local->xattr_req, NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry(entry, &entries.list, list) +- { +- offset = entry->d_off; +- +- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) +- continue; +- +- if (!entry->inode) { +- ret = shard_lookup_marker_entry(this, local, entry); +- if (ret < 0) +- continue; +- } +- link_inode = inode_link(entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug(this->name, 0, +- "Initiating deletion of " +- "shards of gfid %s", +- entry->d_name); +- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, +- link_inode); +- inode_unlink(link_inode, local->fd->inode, entry->d_name); +- inode_unref(link_inode); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", +- entry->d_name); +- continue; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARD_DELETION_COMPLETED, +- "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); +- } +- gf_dirent_free(&entries); +- if (ret) +- break; +- } +- } +- ret = 0; +- loc_wipe(&loc); +- return ret; +- +-err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->inodelk_frame; +- lk_local = lk_frame->local; +- local->inodelk_frame = NULL; +- loc = &local->int_inodelk.loc; +- lock = &lk_local->int_inodelk; +- lock->flock.l_type = F_UNLCK; +- +- STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, +- &lock->flock, NULL); +- local->int_inodelk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata); +-int +-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- loc_t *dst_loc = NULL; +- loc_t tmp_loc = { +- 0, +- }; +- shard_local_t *local = frame->local; +- +- if (local->dst_block_size) { +- tmp_loc.parent = inode_ref(local->loc2.parent); +- ret = inode_path(tmp_loc.parent, local->loc2.name, +- (char **)&tmp_loc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on pargfid=%s bname=%s", +- uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- tmp_loc.name = strrchr(tmp_loc.path, '/'); +- if (tmp_loc.name) +- tmp_loc.name++; +- dst_loc = &tmp_loc; +- } else { +- dst_loc = &local->loc2; +- } +- +- /* To-Do: Request open-fd count on dst base file */ +- STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, +- local->xattr_req); +- loc_wipe(&tmp_loc); +- return 0; +-err: +- loc_wipe(&tmp_loc); +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this); +- +-int +-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Xattrop on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, +- local->newloc.name); +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- dict_t *xdata = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- xdata = dict_new(); +- if (!xdata) +- goto err; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, +- local->prebuf.ia_size, 0, err); +- STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, +- &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); +- dict_unref(xdata); +- return 0; +-err: +- if (xdata) +- dict_unref(xdata); +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Lookup on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- shard_set_size_attrs_on_marker_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- dict_t *xattr_req = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); +- dict_unref(xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me_cbk( +- call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, +- int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (op_ret < 0) { +- if ((op_errno != EEXIST) && (op_errno != ENODATA)) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Marker file creation " +- "failed while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } else { +- shard_lookup_marker_file(frame, this); +- return 0; +- } +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, +- loc_t *loc) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- char g1[64] = { +- 0, +- }; +- char g2[64] = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- local->newloc.inode = inode_new(this->itable); +- local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), +- (char **)&local->newloc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on " +- "pargfid=%s bname=%s", +- uuid_utoa_r(priv->dot_shard_rm_gfid, g1), +- uuid_utoa_r(loc->inode->gfid, g2)); +- goto err; +- } +- local->newloc.name = strrchr(local->newloc.path, '/'); +- if (local->newloc.name) +- local->newloc.name++; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- +- SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, +- local->prebuf.ia_size, 0, err); +- +- STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, +- &local->newloc, 0, 0, 0644, xattr_req); +- dict_unref(xattr_req); +- return 0; +- +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, +- NULL, NULL, NULL, NULL, NULL); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); +- +-int +-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->preoldparent = *preparent; +- local->postoldparent = *postparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- +- shard_unlink_cbk(frame, this); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- /* To-Do: Request open-fd count on base file */ +- STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, +- local->xattr_req); +- return 0; +-} +- +-int +-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_entrylk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->entrylk_frame; +- lk_local = lk_frame->local; +- local->entrylk_frame = NULL; +- lock = &lk_local->int_entrylk; +- loc = &lock->loc; +- +- STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, loc, +- lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, +- NULL); +- local->int_entrylk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_create_marker_file_under_remove_me(frame, this, +- &local->int_inodelk.loc); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-entrylk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_entrylk.acquired_lock = _gf_true; +- shard_post_entrylk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, +- uuid_t gfid) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_local_t *entrylk_local = NULL; +- shard_entrylk_t *int_entrylk = NULL; +- call_frame_t *entrylk_frame = NULL; +- +- local = frame->local; +- entrylk_frame = create_frame(this, this->ctx->pool); +- if (!entrylk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock marker file"); +- goto err; +- } +- +- entrylk_local = mem_get0(this->local_pool); +- if (!entrylk_local) { +- STACK_DESTROY(entrylk_frame->root); +- goto err; +- } +- +- entrylk_frame->local = entrylk_local; +- entrylk_local->main_frame = frame; +- int_entrylk = &entrylk_local->int_entrylk; +- +- int_entrylk->loc.inode = inode_ref(inode); +- set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); +- local->entrylk_frame = entrylk_frame; +- gf_uuid_unparse(gfid, gfid_str); +- int_entrylk->basename = gf_strdup(gfid_str); +- +- STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, +- int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- +- if (local->prebuf.ia_nlink > 1) { +- gf_msg_debug(this->name, 0, +- "link count on %s > 1:%d, " +- "performing rename()/unlink()", +- local->int_inodelk.loc.path, local->prebuf.ia_nlink); +- if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- else if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- } else { +- gf_msg_debug(this->name, 0, +- "link count on %s = 1, creating " +- "file under .remove_me", +- local->int_inodelk.loc.path); +- local->cleanup_required = _gf_true; +- shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, +- local->prebuf.ia_gfid); +- } +- return 0; +-} +- +-int +-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_lookup_base_file(frame, this, &local->int_inodelk.loc, +- shard_post_lookup_base_shard_rm_handler); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-inodelk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_inodelk.acquired_lock = _gf_true; +- shard_post_inodelk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +-{ +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *int_inodelk = NULL; +- +- local = frame->local; +- lk_frame = create_frame(this, this->ctx->pool); +- if (!lk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock base shard"); +- goto err; +- } +- lk_local = mem_get0(this->local_pool); +- if (!lk_local) { +- STACK_DESTROY(lk_frame->root); +- goto err; +- } +- +- lk_frame->local = lk_local; +- lk_local->main_frame = frame; +- int_inodelk = &lk_local->int_inodelk; +- +- int_inodelk->flock.l_len = 0; +- int_inodelk->flock.l_start = 0; +- int_inodelk->domain = this->name; +- int_inodelk->flock.l_type = F_WRLCK; +- loc_copy(&local->int_inodelk.loc, loc); +- set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); +- local->inodelk_frame = lk_frame; +- +- STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, +- &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- if (local->fop == GF_FOP_UNLINK) +- loc = &local->loc; +- else if (local->fop == GF_FOP_RENAME) +- loc = &local->loc2; +- shard_acquire_inodelk(frame, this, loc); +- return 0; +-} +- +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type); +-int +-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- return 0; +-} +- +-void +-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- local->dot_shard_rm_loc.inode = inode_find(this->itable, +- priv->dot_shard_rm_gfid); +- if (!local->dot_shard_rm_loc.inode) { +- local->dot_shard_loc.inode = inode_find(this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_pre_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- } else { +- local->post_res_handler = shard_post_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- } +-} +- +-int +-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, loc); +- local->xflag = xflag; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- local->block_size = block_size; +- local->resolver_base_inode = loc->inode; +- local->fop = GF_FOP_UNLINK; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- +- local->resolve_not = _gf_true; +- shard_begin_rm_resolution(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_rename_cbk(frame, this); +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } +- /* Set ctx->refresh to TRUE to force a lookup on disk when +- * shard_lookup_base_file() is called next to refresh the hard link +- * count in ctx. Note that this is applicable only to the case where +- * the rename dst is already existent and sharded. +- */ +- if ((local->dst_block_size) && (!local->cleanup_required)) +- shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); +- +- local->prebuf = *buf; +- local->preoldparent = *preoldparent; +- local->postoldparent = *postoldparent; +- local->prenewparent = *prenewparent; +- local->postnewparent = *postnewparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- +- if (local->dst_block_size) { +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- goto err; +- } +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- /* Now the base file of src, if sharded, is looked up to gather ia_size +- * and ia_blocks.*/ +- if (local->block_size) { +- local->tmp_loc.inode = inode_new(this->itable); +- gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); +- shard_lookup_base_file(frame, this, &local->tmp_loc, +- shard_post_rename_lookup_handler); +- } else { +- shard_rename_cbk(frame, this); +- } +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- /* Save dst base file attributes into postbuf so the information is not +- * lost when it is overwritten after lookup on base file of src in +- * shard_lookup_base_file_cbk(). +- */ +- local->postbuf = local->prebuf; +- shard_rename_src_base_file(frame, this); +- return 0; +-} +- +-int +-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- uint64_t dst_block_size = 0; +- shard_local_t *local = NULL; +- +- if (IA_ISDIR(oldloc->inode->ia_type)) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (newloc->inode) +- ret = shard_inode_ctx_get_block_size(newloc->inode, this, +- &dst_block_size); +- +- /* The following stack_wind covers the case where: +- * a. the src file is not sharded and dst doesn't exist, OR +- * b. the src and dst both exist but are not sharded. +- */ +- if (((!block_size) && (!dst_block_size)) || +- frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->resolver_base_inode = newloc->inode; +- local->fop = GF_FOP_RENAME; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- local->block_size = block_size; +- local->dst_block_size = dst_block_size; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- local->resolve_not = _gf_true; +- +- /* The following if-block covers the case where the dst file exists +- * and is sharded. +- */ +- if (local->dst_block_size) { +- shard_begin_rm_resolution(frame, this); +- } else { +- /* The following block covers the case where the dst either doesn't +- * exist or is NOT sharded but the src is sharded. In this case, shard +- * xlator would go ahead and rename src to dst. Once done, it would also +- * lookup the base shard of src to get the ia_size and ia_blocks xattr +- * values. +- */ +- shard_rename_src_base_file(frame, this); +- } +- return 0; +- +-err: +- shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, +- struct iatt *stbuf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, +- preparent, postparent, xdata); +- return 0; +-} +- +-int +-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- local->block_size = priv->block_size; +- +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); +- } +- +- STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, +- xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +-{ +- /* To-Do: Handle open with O_TRUNC under locks */ +- SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); +- return 0; +-} +- +-int +-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +-} +- +-int +-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iovec *vector, +- int32_t count, struct iatt *stbuf, struct iobref *iobref, +- dict_t *xdata) +-{ +- int i = 0; +- int call_count = 0; +- void *address = NULL; +- uint64_t block_num = 0; +- off_t off = 0; +- struct iovec vec = { +- 0, +- }; +- shard_local_t *local = NULL; +- fd_t *anon_fd = cookie; +- shard_inode_ctx_t *ctx = NULL; +- +- local = frame->local; +- +- /* If shard has already seen a failure here before, there is no point +- * in aggregating subsequent reads, so just go to out. +- */ +- if (local->op_ret < 0) +- goto out; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } +- +- if (local->op_ret >= 0) +- local->op_ret += op_ret; +- +- shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- block_num = ctx->block_num; +- +- if (block_num == local->first_block) { +- address = local->iobuf->ptr; +- } else { +- /* else +- * address to start writing to = beginning of buffer + +- * number of bytes until end of first block + +- * + block_size times number of blocks +- * between the current block and the first +- */ +- address = (char *)local->iobuf->ptr + +- (local->block_size - (local->offset % local->block_size)) + +- ((block_num - local->first_block - 1) * local->block_size); +- } +- +- for (i = 0; i < count; i++) { +- address = (char *)address + off; +- memcpy(address, vector[i].iov_base, vector[i].iov_len); +- off += vector[i].iov_len; +- } +- +-out: +- if (anon_fd) +- fd_unref(anon_fd); +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- } else { +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, +- &vec, 1, &local->prebuf, local->iobref, +- local->xattr_rsp); +- return 0; +- } +- } +- +- return 0; +-} +- +-int +-shard_readv_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int call_count = 0; +- int last_block = 0; +- int cur_block = 0; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- size_t read_size = 0; +- size_t remaining_size = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- cur_block = local->first_block; +- last_block = local->last_block; +- remaining_size = local->total_size; +- local->call_count = call_count = local->num_blocks; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (fd->flags & O_DIRECT) +- local->flags = O_DIRECT; +- +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, +- 0, NULL, NULL, NULL); +- goto next; +- } +- +- shard_offset = orig_offset % local->block_size; +- read_size = local->block_size - shard_offset; +- if (read_size > remaining_size) +- read_size = remaining_size; +- +- remaining_size -= read_size; +- +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, 0, NULL, NULL, NULL); +- goto next; +- } +- } ++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + +- STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, anon_fd, read_size, +- shard_offset, local->flags, local->xattr_req); ++ priv = this->private; ++ local = frame->local; + +- orig_offset += read_size; +- next: +- cur_block++; +- i++; +- call_count--; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ ++ if (local->prebuf.ia_nlink > 1) { ++ gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " ++ "performing rename()/unlink()", ++ local->int_inodelk.loc.path, local->prebuf.ia_nlink); ++ if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ else if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ } else { ++ gf_msg_debug(this->name, 0, "link count on %s = 1, creating " ++ "file under .remove_me", ++ local->int_inodelk.loc.path); ++ local->cleanup_required = _gf_true; ++ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, ++ local->prebuf.ia_gfid); ++ } ++ return 0; ++} ++ ++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_lookup_base_file(frame, this, &local->int_inodelk.loc, ++ shard_post_lookup_base_shard_rm_handler); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-inodelk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_inodelk.acquired_lock = _gf_true; ++ shard_post_inodelk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *int_inodelk = NULL; ++ ++ local = frame->local; ++ lk_frame = create_frame(this, this->ctx->pool); ++ if (!lk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock base shard"); ++ goto err; ++ } ++ lk_local = mem_get0(this->local_pool); ++ if (!lk_local) { ++ STACK_DESTROY(lk_frame->root); ++ goto err; ++ } ++ ++ lk_frame->local = lk_local; ++ lk_local->main_frame = frame; ++ int_inodelk = &lk_local->int_inodelk; ++ ++ int_inodelk->flock.l_len = 0; ++ int_inodelk->flock.l_start = 0; ++ int_inodelk->domain = this->name; ++ int_inodelk->flock.l_type = F_WRLCK; ++ loc_copy(&local->int_inodelk.loc, loc); ++ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); ++ local->inodelk_frame = lk_frame; ++ ++ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, ++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- int call_count = 0; +- shard_local_t *local = NULL; ++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) { +- if (op_errno == EEXIST) { +- LOCK(&frame->lock); +- { +- local->eexist_count++; +- } +- UNLOCK(&frame->lock); +- } else { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } +- gf_msg_debug(this->name, 0, +- "mknod of shard %d " +- "failed: %s", +- shard_block_num, strerror(op_errno)); +- goto done; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++ } ++ if (local->fop == GF_FOP_UNLINK) ++ loc = &local->loc; ++ else if (local->fop == GF_FOP_RENAME) ++ loc = &local->loc2; ++ shard_acquire_inodelk(frame, this, loc); ++ return 0; ++} + +- shard_link_block_inode(local, shard_block_num, inode, buf); ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); ++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- local->create_count = 0; +- local->post_mknod_handler(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ return 0; + } + +-int +-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, +- shard_post_mknod_fop_handler_t post_mknod_handler) +-{ +- int i = 0; +- int shard_idx_iter = 0; +- int last_block = 0; +- int ret = 0; +- int call_count = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- mode_t mode = 0; +- char *bname = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t ctx_tmp = { +- 0, +- }; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- fd_t *fd = NULL; +- loc_t loc = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- +- local = frame->local; +- priv = this->private; +- fd = local->fd; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- call_count = local->call_count = local->create_count; +- local->post_mknod_handler = post_mknod_handler; ++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ priv = this->private; ++ local = frame->local; + +- ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get inode " +- "ctx for %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); ++ local->dot_shard_rm_loc.inode = ++ inode_find(this->itable, priv->dot_shard_rm_gfid); ++ if (!local->dot_shard_rm_loc.inode) { ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_pre_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ } else { ++ local->post_res_handler = shard_post_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ } ++} ++ ++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ loc_copy(&local->loc, loc); ++ local->xflag = xflag; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ local->block_size = block_size; ++ local->resolver_base_inode = loc->inode; ++ local->fop = GF_FOP_UNLINK; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ ++ local->resolve_not = _gf_true; ++ shard_begin_rm_resolution(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); ++ return 0; ++} + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- shard_idx_iter++; +- i++; +- continue; +- } ++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { ++ shard_rename_cbk(frame, this); ++ return 0; ++} + +- if (wind_failed) { +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, +- sizeof(path)); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ local = frame->local; + +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- "on %s, base file gfid = %s", +- bname, uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- dict_unref(xattr_req); +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ /* Set ctx->refresh to TRUE to force a lookup on disk when ++ * shard_lookup_base_file() is called next to refresh the hard link ++ * count in ctx. Note that this is applicable only to the case where ++ * the rename dst is already existent and sharded. ++ */ ++ if ((local->dst_block_size) && (!local->cleanup_required)) ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ ++ local->prebuf = *buf; ++ local->preoldparent = *preoldparent; ++ local->postoldparent = *postoldparent; ++ local->prenewparent = *prenewparent; ++ local->postnewparent = *postnewparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); + +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, &loc, mode, +- ctx_tmp.stat.ia_rdev, 0, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- +- next: +- shard_idx_iter++; +- i++; +- if (!--call_count) +- break; ++ if (local->dst_block_size) { ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } + } + +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ goto err; ++ } ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } ++ ++ /* Now the base file of src, if sharded, is looked up to gather ia_size ++ * and ia_blocks.*/ ++ if (local->block_size) { ++ local->tmp_loc.inode = inode_new(this->itable); ++ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); ++ shard_lookup_base_file(frame, this, &local->tmp_loc, ++ shard_post_rename_lookup_handler); ++ } else { ++ shard_rename_cbk(frame, this); ++ } ++ return 0; + err: +- /* +- * This block is for handling failure in shard_inode_ctx_get_all(). +- * Failures in the while-loop are handled within the loop. +- */ +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- post_mknod_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); +- +-int +-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ /* Save dst base file attributes into postbuf so the information is not ++ * lost when it is overwritten after lookup on base file of src in ++ * shard_lookup_base_file_cbk(). ++ */ ++ local->postbuf = local->prebuf; ++ shard_rename_src_base_file(frame, this); ++ return 0; ++} ++ ++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ uint64_t dst_block_size = 0; ++ shard_local_t *local = NULL; ++ ++ if (IA_ISDIR(oldloc->inode->ia_type)) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } ++ ++ if (newloc->inode) ++ ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); ++ ++ /* The following stack_wind covers the case where: ++ * a. the src file is not sharded and dst doesn't exist, OR ++ * b. the src and dst both exist but are not sharded. ++ */ ++ if (((!block_size) && (!dst_block_size)) || ++ frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->resolver_base_inode = newloc->inode; ++ local->fop = GF_FOP_RENAME; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->block_size = block_size; ++ local->dst_block_size = dst_block_size; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ local->resolve_not = _gf_true; ++ ++ /* The following if-block covers the case where the dst file exists ++ * and is sharded. ++ */ ++ if (local->dst_block_size) { ++ shard_begin_rm_resolution(frame, this); ++ } else { ++ /* The following block covers the case where the dst either doesn't ++ * exist or is NOT sharded but the src is sharded. In this case, shard ++ * xlator would go ahead and rename src to dst. Once done, it would also ++ * lookup the base shard of src to get the ia_size and ia_blocks xattr ++ * values. ++ */ ++ shard_rename_src_base_file(frame, this); ++ } ++ return 0; ++ ++err: ++ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ if (op_ret == -1) ++ goto unwind; + +- if (!local->eexist_count) { +- shard_readv_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_readv_handler); +- } +- return 0; ++ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, ++ SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); ++ ++unwind: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ return 0; + } + +-int +-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (local->op_ret < 0) { +- if (local->op_errno != ENOENT) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } else { +- struct iovec vec = { +- 0, +- }; +- +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, +- &local->prebuf, local->iobref, NULL); +- return 0; +- } +- } ++ frame->local = local; ++ local->block_size = priv->block_size; + +- if (local->call_count) { +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- return 0; +-} ++ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { ++ /* To-Do: Handle open with O_TRUNC under locks */ ++ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); ++ return 0; ++} ++ ++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); ++ return 0; ++} ++ ++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iovec *vector, ++ int32_t count, struct iatt *stbuf, struct iobref *iobref, ++ dict_t *xdata) { ++ int i = 0; ++ int call_count = 0; ++ void *address = NULL; ++ uint64_t block_num = 0; ++ off_t off = 0; ++ struct iovec vec = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ fd_t *anon_fd = cookie; ++ shard_inode_ctx_t *ctx = NULL; ++ ++ local = frame->local; ++ ++ /* If shard has already seen a failure here before, there is no point ++ * in aggregating subsequent reads, so just go to out. ++ */ ++ if (local->op_ret < 0) ++ goto out; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ if (local->op_ret >= 0) ++ local->op_ret += op_ret; + +-int +-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- struct iobuf *iobuf = NULL; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ block_num = ctx->block_num; ++ ++ if (block_num == local->first_block) { ++ address = local->iobuf->ptr; ++ } else { ++ /* else ++ * address to start writing to = beginning of buffer + ++ * number of bytes until end of first block + ++ * + block_size times number of blocks ++ * between the current block and the first ++ */ ++ address = (char *)local->iobuf->ptr + ++ (local->block_size - (local->offset % local->block_size)) + ++ ((block_num - local->first_block - 1) * local->block_size); ++ } + +- priv = this->private; +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ address = (char *)address + off; ++ memcpy(address, vector[i].iov_base, vector[i].iov_len); ++ off += vector[i].iov_len; ++ } + ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, ++ &local->prebuf, local->iobref, local->xattr_rsp); ++ return 0; ++ } ++ } ++ ++ return 0; ++} ++ ++int shard_readv_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int call_count = 0; ++ int last_block = 0; ++ int cur_block = 0; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ size_t read_size = 0; ++ size_t remaining_size = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ cur_block = local->first_block; ++ last_block = local->last_block; ++ remaining_size = local->total_size; ++ local->call_count = call_count = local->num_blocks; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (fd->flags & O_DIRECT) ++ local->flags = O_DIRECT; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, ++ NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_offset = orig_offset % local->block_size; ++ read_size = local->block_size - shard_offset; ++ if (read_size > remaining_size) ++ read_size = remaining_size; ++ ++ remaining_size -= read_size; ++ ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); ++ } else { ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, ++ 0, NULL, NULL, NULL); ++ goto next; ++ } + } + +- if (local->offset >= local->prebuf.ia_size) { +- /* If the read is being performed past the end of the file, +- * unwind the FOP with 0 bytes read as status. +- */ +- struct iovec vec = { +- 0, +- }; +- +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); +- if (!iobuf) +- goto err; +- +- vec.iov_base = iobuf->ptr; +- vec.iov_len = 0; +- local->iobref = iobref_new(); +- iobref_add(local->iobref, iobuf); +- iobuf_unref(iobuf); +- +- SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, +- local->iobref, NULL); +- return 0; +- } ++ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, anon_fd, read_size, ++ shard_offset, local->flags, local->xattr_req); ++ ++ orig_offset += read_size; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; ++} + +- local->first_block = get_lowest_block(local->offset, local->block_size); ++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ if (op_errno == EEXIST) { ++ LOCK(&frame->lock); ++ { local->eexist_count++; } ++ UNLOCK(&frame->lock); ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, 0, "mknod of shard %d " ++ "failed: %s", ++ shard_block_num, strerror(op_errno)); ++ goto done; ++ } + +- local->total_size = local->req_size; ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local->create_count = 0; ++ local->post_mknod_handler(frame, this); ++ } ++ ++ return 0; ++} ++ ++int shard_common_resume_mknod( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_mknod_fop_handler_t post_mknod_handler) { ++ int i = 0; ++ int shard_idx_iter = 0; ++ int last_block = 0; ++ int ret = 0; ++ int call_count = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ mode_t mode = 0; ++ char *bname = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t ctx_tmp = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ fd_t *fd = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ fd = local->fd; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ call_count = local->call_count = local->create_count; ++ local->post_mknod_handler = post_mknod_handler; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get inode " ++ "ctx for %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc.inode; ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ shard_idx_iter++; ++ i++; ++ continue; ++ } + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ if (wind_failed) { ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; ++ } + +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); +- if (!iobuf) +- goto err; ++ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, ++ sizeof(path)); + +- local->iobref = iobref_new(); +- if (!local->iobref) { +- iobuf_unref(iobuf); +- goto err; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- if (iobref_add(local->iobref, iobuf) != 0) { +- iobuf_unref(iobuf); +- goto err; ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ "on %s, base file gfid = %s", ++ bname, uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- memset(iobuf->ptr, 0, local->total_size); +- iobuf_unref(iobuf); +- local->iobuf = iobuf; ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_readv_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, &loc, mode, ++ ctx_tmp.stat.ia_rdev, 0, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ ++ next: ++ shard_idx_iter++; ++ i++; ++ if (!--call_count) ++ break; ++ } ++ ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); +- return 0; ++ /* ++ * This block is for handling failure in shard_inode_ctx_get_all(). ++ * Failures in the while-loop are handled within the loop. ++ */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ post_mknod_handler(frame, this); ++ return 0; + } + +-int +-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, uint32_t flags, dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++int shard_post_lookup_shards_readv_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, +- xdata); +- return 0; +- } ++ local = frame->local; + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } + +- frame->local = local; ++ return 0; ++} + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->offset = offset; +- local->req_size = size; +- local->flags = flags; +- local->fop = GF_FOP_READ; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local = frame->local; + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_readv_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ if (!local->eexist_count) { ++ shard_readv_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_readv_handler); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_update_size_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); ++ if (local->op_ret < 0) { ++ if (local->op_errno != ENOENT) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } else { +- shard_common_inode_write_success_unwind(local->fop, frame, +- local->written_size); ++ struct iovec vec = { ++ 0, ++ }; ++ ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, ++ &local->prebuf, local->iobref, NULL); ++ return 0; + } +- return 0; +-} ++ } + +-static gf_boolean_t +-shard_is_appending_write(shard_local_t *local) +-{ +- if (local->fop != GF_FOP_WRITE) +- return _gf_false; +- if (local->flags & O_APPEND) +- return _gf_true; +- if (local->fd->flags & O_APPEND) +- return _gf_true; +- return _gf_false; ++ if (local->call_count) { ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } ++ ++ return 0; + } + +-int +-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ struct iobuf *iobuf = NULL; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ if (local->offset >= local->prebuf.ia_size) { ++ /* If the read is being performed past the end of the file, ++ * unwind the FOP with 0 bytes read as status. ++ */ ++ struct iovec vec = { ++ 0, ++ }; + +- if (shard_is_appending_write(local)) { +- local->delta_size = local->total_size; +- } else if (local->offset + local->total_size > ctx->stat.ia_size) { +- local->delta_size = (local->offset + local->total_size) - +- ctx->stat.ia_size; +- } else { +- local->delta_size = 0; +- } +- ctx->stat.ia_size += (local->delta_size); +- local->postbuf = ctx->stat; ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); ++ if (!iobuf) ++ goto err; ++ ++ vec.iov_base = iobuf->ptr; ++ vec.iov_len = 0; ++ local->iobref = iobref_new(); ++ iobref_add(local->iobref, iobuf); ++ iobuf_unref(iobuf); + ++ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, ++ local->iobref, NULL); + return 0; +-} ++ } + +-int +-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; ++ local->first_block = get_lowest_block(local->offset, local->block_size); + +- LOCK(&inode->lock); +- { +- ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++ local->total_size = local->req_size; + +- return ret; +-} ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); + +-int +-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, struct iatt *pre, +- struct iatt *post, dict_t *xdata) +-{ +- int call_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- glusterfs_fop_t fop = 0; ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = local->loc.inode; + +- local = frame->local; +- fop = local->fop; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->written_size += op_ret; +- GF_ATOMIC_ADD(local->delta_blocks, +- post->ia_blocks - pre->ia_blocks); +- local->delta_size += (post->ia_size - pre->ia_size); +- shard_inode_ctx_set(local->fd->inode, this, post, 0, +- SHARD_MASK_TIMES); +- if (local->fd->inode != anon_fd->inode) +- shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, +- anon_fd->inode); +- } +- } +- UNLOCK(&frame->lock); ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); ++ if (!iobuf) ++ goto err; + +- if (anon_fd) +- fd_unref(anon_fd); ++ local->iobref = iobref_new(); ++ if (!local->iobref) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(fop, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); +- local->hole_size = 0; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- shard_update_file_size( +- frame, this, local->fd, NULL, +- shard_common_inode_write_post_update_size_handler); +- } +- } ++ if (iobref_add(local->iobref, iobuf) != 0) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- return 0; ++ memset(iobuf->ptr, 0, local->total_size); ++ iobuf_unref(iobuf); ++ local->iobuf = iobuf; ++ ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_readv_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->offset = offset; ++ local->req_size = size; ++ local->flags = flags; ++ local->fop = GF_FOP_READ; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_readv_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vec, int count, off_t shard_offset, +- size_t size) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- switch (local->fop) { +- case GF_FOP_WRITE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, +- local->flags, local->iobref, local->xattr_req); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, local->flags, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, +- shard_offset, size, local->xattr_req); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", local->fop); +- break; +- } +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_common_inode_write_success_unwind(local->fop, frame, ++ local->written_size); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int count = 0; +- int call_count = 0; +- int last_block = 0; +- uint32_t cur_block = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- struct iovec *vec = NULL; +- gf_boolean_t wind_failed = _gf_false; +- gf_boolean_t odirect = _gf_false; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- off_t vec_offset = 0; +- size_t remaining_size = 0; +- size_t shard_write_size = 0; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- remaining_size = local->total_size; +- cur_block = local->first_block; +- local->call_count = call_count = local->num_blocks; +- last_block = local->last_block; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC +- " into " +- "dict: %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- local->call_count = 1; +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- return 0; +- } ++static gf_boolean_t shard_is_appending_write(shard_local_t *local) { ++ if (local->fop != GF_FOP_WRITE) ++ return _gf_false; ++ if (local->flags & O_APPEND) ++ return _gf_true; ++ if (local->fd->flags & O_APPEND) ++ return _gf_true; ++ return _gf_false; ++} + +- if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) +- odirect = _gf_true; ++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- shard_offset = orig_offset % local->block_size; +- shard_write_size = local->block_size - shard_offset; +- if (shard_write_size > remaining_size) +- shard_write_size = remaining_size; +- +- remaining_size -= shard_write_size; +- +- if (local->fop == GF_FOP_WRITE) { +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, NULL); +- +- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); +- if (!vec) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, +- -1, ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, vec); +- } ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, +- this, -1, ENOMEM, NULL, NULL, +- NULL); +- goto next; +- } +- +- if (local->fop == GF_FOP_WRITE) { +- if (odirect) +- local->flags = O_DIRECT; +- else +- local->flags = GF_ANON_FD_FLAGS; +- } +- } ++ if (shard_is_appending_write(local)) { ++ local->delta_size = local->total_size; ++ } else if (local->offset + local->total_size > ctx->stat.ia_size) { ++ local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; ++ } else { ++ local->delta_size = 0; ++ } ++ ctx->stat.ia_size += (local->delta_size); ++ local->postbuf = ctx->stat; + +- shard_common_inode_write_wind(frame, this, anon_fd, vec, count, +- shard_offset, shard_write_size); +- if (vec) +- vec_offset += shard_write_size; +- orig_offset += shard_write_size; +- GF_FREE(vec); +- vec = NULL; +- next: +- cur_block++; +- i++; +- call_count--; +- } +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, +- xlator_t *this); ++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ ++ LOCK(&inode->lock); ++ { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +-int +-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ return ret; ++} + +- local = frame->local; ++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *pre, ++ struct iatt *post, dict_t *xdata) { ++ int call_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ glusterfs_fop_t fop = 0; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ local = frame->local; ++ fop = local->fop; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, +- shard_common_inode_write_post_mknod_handler); ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; + } else { +- shard_common_inode_write_do(frame, this); ++ local->written_size += op_ret; ++ GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); ++ local->delta_size += (post->ia_size - pre->ia_size); ++ shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); ++ if (local->fd->inode != anon_fd->inode) ++ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, ++ anon_fd->inode); ++ } ++ } ++ UNLOCK(&frame->lock); ++ ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); ++ } else { ++ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); ++ local->hole_size = 0; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ shard_update_file_size(frame, this, local->fd, NULL, ++ shard_common_inode_write_post_update_size_handler); + } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vec, int count, ++ off_t shard_offset, size_t size) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ switch (local->fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, ++ vec, count, shard_offset, local->flags, local->iobref, ++ local->xattr_req); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, ++ local->flags, shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", local->fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int count = 0; ++ int call_count = 0; ++ int last_block = 0; ++ uint32_t cur_block = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ struct iovec *vec = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ gf_boolean_t odirect = _gf_false; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ off_t vec_offset = 0; ++ size_t remaining_size = 0; ++ size_t shard_write_size = 0; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ remaining_size = local->total_size; ++ cur_block = local->first_block; ++ local->call_count = call_count = local->num_blocks; ++ last_block = local->last_block; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " ++ "dict: %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ local->call_count = 1; ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!local->eexist_count) { +- shard_common_inode_write_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards( +- frame, this, local->loc.inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) ++ odirect = _gf_true; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- return 0; +-} ++ shard_offset = orig_offset % local->block_size; ++ shard_write_size = local->block_size - shard_offset; ++ if (shard_write_size > remaining_size) ++ shard_write_size = remaining_size; + +-int +-shard_common_inode_write_post_resolve_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ remaining_size -= shard_write_size; + +- local = frame->local; ++ if (local->fop == GF_FOP_WRITE) { ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, NULL); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); ++ if (!vec) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, vec); + } + +- if (local->call_count) { +- shard_common_lookup_shards( +- frame, this, local->resolver_base_inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); + } else { +- shard_common_inode_write_do(frame, this); +- } ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } + +- return 0; ++ if (local->fop == GF_FOP_WRITE) { ++ if (odirect) ++ local->flags = O_DIRECT; ++ else ++ local->flags = GF_ANON_FD_FLAGS; ++ } ++ } ++ ++ shard_common_inode_write_wind(frame, this, anon_fd, vec, count, ++ shard_offset, shard_write_size); ++ if (vec) ++ vec_offset += shard_write_size; ++ orig_offset += shard_write_size; ++ GF_FREE(vec); ++ vec = NULL; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_lookup_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- shard_priv_t *priv = this->private; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = local->prebuf; +- +- /*Adjust offset to EOF so that correct shard is chosen for append*/ +- if (shard_is_appending_write(local)) +- local->offset = local->prebuf.ia_size; +- +- local->first_block = get_lowest_block(local->offset, local->block_size); +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this); + +- gf_msg_trace(this->name, 0, +- "%s: gfid=%s first_block=%" PRIu32 +- " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 +- " total_size=%zu flags=%" PRId32 "", +- gf_fop_list[local->fop], +- uuid_utoa(local->resolver_base_inode->gfid), +- local->first_block, local->last_block, local->num_blocks, +- local->offset, local->total_size, local->flags); ++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ local = frame->local; + +- if (!local->dot_shard_loc.inode) { +- /*change handler*/ +- shard_mkdir_internal_dir(frame, this, +- shard_common_inode_write_post_resolve_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- /*change handler*/ +- local->post_res_handler = shard_common_inode_write_post_resolve_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} +- +-int +-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ } + +- local = frame->local; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, ++ shard_common_inode_write_post_mknod_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} + +- if (op_ret == -1) { +- if (op_errno != EEXIST) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } else { +- gf_msg_debug(this->name, 0, +- "mkdir on %s failed " +- "with EEXIST. Attempting lookup now", +- shard_internal_dir_string(type)); +- shard_lookup_internal_dir(frame, this, local->post_res_handler, +- type); +- return 0; +- } +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; +-unwind: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; +-} ++ local = frame->local; + +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xattr_req = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- +- local->post_res_handler = handler; +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) +- goto err; ++ if (!local->eexist_count) { ++ shard_common_inode_write_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards( ++ frame, this, local->loc.inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } + +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; ++ return 0; ++} + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid-req for %s", +- shard_internal_dir_string(type)); +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ local = frame->local; + +- STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, +- 0755, 0, xattr_req); +- dict_unref(xattr_req); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } + +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- if (free_gfid) +- GF_FREE(gfid); +- handler(frame, this); +- return 0; +-} ++ if (local->call_count) { ++ shard_common_lookup_shards( ++ frame, this, local->resolver_base_inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +-int +-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- /* To-Do: Wind flush on all shards of the file */ +- SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); +- return 0; ++ return 0; + } + +-int +-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->flush, fd, xdata); ++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = frame->local; ++ shard_priv_t *priv = this->private; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} ++ } + +-int +-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++ local->postbuf = local->prebuf; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ /*Adjust offset to EOF so that correct shard is chosen for append*/ ++ if (shard_is_appending_write(local)) ++ local->offset = local->prebuf.ia_size; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ local->first_block = get_lowest_block(local->offset, local->block_size); ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- local->postbuf.ia_ctime = ctx->stat.ia_ctime; +- local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; +- local->postbuf.ia_atime = ctx->stat.ia_atime; +- local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; +- local->postbuf.ia_mtime = ctx->stat.ia_mtime; +- local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; ++ gf_msg_trace( ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " ++ "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", ++ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), ++ local->first_block, local->last_block, local->num_blocks, local->offset, ++ local->total_size, local->flags); + +- return 0; +-} ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + +-int +-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = 0; ++ if (!local->dot_shard_loc.inode) { ++ /*change handler*/ ++ shard_mkdir_internal_dir(frame, this, ++ shard_common_inode_write_post_resolve_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ /*change handler*/ ++ local->post_res_handler = shard_common_inode_write_post_resolve_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++} + +- LOCK(&inode->lock); +- { +- ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ ++ if (op_ret == -1) { ++ if (op_errno != EEXIST) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } else { ++ gf_msg_debug(this->name, 0, "mkdir on %s failed " ++ "with EEXIST. Attempting lookup now", ++ shard_internal_dir_string(type)); ++ shard_lookup_internal_dir(frame, this, local->post_res_handler, type); ++ return 0; ++ } ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; ++unwind: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xattr_req = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ local->post_res_handler = handler; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) ++ goto err; ++ ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid-req for %s", shard_internal_dir_string(type)); ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, ++ 0755, 0, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- return ret; ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ if (free_gfid) ++ GF_FREE(gfid); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- int call_count = 0; +- uint64_t fsync_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- inode_t *base_inode = NULL; +- gf_boolean_t unref_shard_inode = _gf_false; ++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ /* To-Do: Wind flush on all shards of the file */ ++ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); ++ return 0; ++} + +- local = frame->local; +- base_inode = local->fd->inode; ++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->flush, fd, xdata); ++ return 0; ++} + +- if (local->op_ret < 0) +- goto out; ++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- UNLOCK(&frame->lock); +- goto out; +- } +- shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, +- SHARD_MASK_TIMES); +- } +- UNLOCK(&frame->lock); +- fd_ctx_get(anon_fd, this, &fsync_count); +-out: +- if (anon_fd && (base_inode != anon_fd->inode)) { +- LOCK(&base_inode->lock); +- LOCK(&anon_fd->inode->lock); +- { +- __shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (op_ret == 0) +- ctx->fsync_needed -= fsync_count; +- GF_ASSERT(ctx->fsync_needed >= 0); +- if (ctx->fsync_needed != 0) { +- list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); +- base_ictx->fsync_count++; +- } else { +- unref_shard_inode = _gf_true; +- } +- } +- UNLOCK(&anon_fd->inode->lock); +- UNLOCK(&base_inode->lock); +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- if (unref_shard_inode) +- inode_unref(anon_fd->inode); +- if (anon_fd) +- fd_unref(anon_fd); ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- call_count = shard_call_count_return(frame); +- if (call_count != 0) +- return 0; ++ local->postbuf.ia_ctime = ctx->stat.ia_ctime; ++ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; ++ local->postbuf.ia_atime = ctx->stat.ia_atime; ++ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; ++ local->postbuf.ia_mtime = ctx->stat.ia_mtime; ++ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_timestamps_from_inode_ctx(local, base_inode, this); +- SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- return 0; ++ return 0; + } + +-int +-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- int call_count = 0; +- int fsync_count = 0; +- fd_t *anon_fd = NULL; +- inode_t *base_inode = NULL; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *iter = NULL; +- struct list_head copy = { +- 0, +- }; +- shard_inode_ctx_t *tmp = NULL; ++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = 0; + +- local = frame->local; +- base_inode = local->fd->inode; +- local->postbuf = local->prebuf; +- INIT_LIST_HEAD(©); ++ LOCK(&inode->lock); ++ { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ return ret; ++} + ++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ int call_count = 0; ++ uint64_t fsync_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ inode_t *base_inode = NULL; ++ gf_boolean_t unref_shard_inode = _gf_false; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ ++ if (local->op_ret < 0) ++ goto out; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ UNLOCK(&frame->lock); ++ goto out; ++ } ++ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); ++ } ++ UNLOCK(&frame->lock); ++ fd_ctx_get(anon_fd, this, &fsync_count); ++out: ++ if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); ++ LOCK(&anon_fd->inode->lock); + { +- __shard_inode_ctx_get(base_inode, this, &ctx); +- list_splice_init(&ctx->to_fsync_list, ©); +- call_count = ctx->fsync_count; +- ctx->fsync_count = 0; +- } ++ __shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (op_ret == 0) ++ ctx->fsync_needed -= fsync_count; ++ GF_ASSERT(ctx->fsync_needed >= 0); ++ if (ctx->fsync_needed != 0) { ++ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); ++ base_ictx->fsync_count++; ++ } else { ++ unref_shard_inode = _gf_true; ++ } ++ } ++ UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); ++ } ++ ++ if (unref_shard_inode) ++ inode_unref(anon_fd->inode); ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count != 0) ++ return 0; + +- local->call_count = ++call_count; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_get_timestamps_from_inode_ctx(local, base_inode, this); ++ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } ++ return 0; ++} ++ ++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ int call_count = 0; ++ int fsync_count = 0; ++ fd_t *anon_fd = NULL; ++ inode_t *base_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *iter = NULL; ++ struct list_head copy = { ++ 0, ++ }; ++ shard_inode_ctx_t *tmp = NULL; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ local->postbuf = local->prebuf; ++ INIT_LIST_HEAD(©); ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ LOCK(&base_inode->lock); ++ { ++ __shard_inode_ctx_get(base_inode, this, &ctx); ++ list_splice_init(&ctx->to_fsync_list, ©); ++ call_count = ctx->fsync_count; ++ ctx->fsync_count = 0; ++ } ++ UNLOCK(&base_inode->lock); ++ ++ local->call_count = ++call_count; ++ ++ /* Send fsync() on the base shard first */ ++ anon_fd = fd_ref(local->fd); ++ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, ++ local->xattr_req); ++ call_count--; ++ anon_fd = NULL; ++ ++ list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { ++ list_del_init(&iter->to_fsync_list); ++ fsync_count = 0; ++ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); ++ GF_ASSERT(fsync_count > 0); ++ anon_fd = fd_anonymous(iter->inode); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "anon fd to fsync shard"); ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + +- /* Send fsync() on the base shard first */ +- anon_fd = fd_ref(local->fd); ++ ret = fd_ctx_set(anon_fd, this, fsync_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, ++ "Failed to set fd " ++ "ctx for shard inode gfid=%s", ++ uuid_utoa(iter->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; +- anon_fd = NULL; +- +- list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) +- { +- list_del_init(&iter->to_fsync_list); +- fsync_count = 0; +- shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); +- GF_ASSERT(fsync_count > 0); +- anon_fd = fd_anonymous(iter->inode); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, +- SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "anon fd to fsync shard"); +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- +- ret = fd_ctx_set(anon_fd, this, fsync_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, +- "Failed to set fd " +- "ctx for shard inode gfid=%s", +- uuid_utoa(iter->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, local->datasync, local->xattr_req); +- call_count--; +- } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, +- dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, ++ dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); +- return 0; +- } ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); ++ return 0; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->fd = fd_ref(fd); +- local->fop = GF_FOP_FSYNC; +- local->datasync = datasync; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->fd = fd_ref(fd); ++ local->fop = GF_FOP_FSYNC; ++ local->datasync = datasync; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_fsync_handler); +- return 0; ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_fsync_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; ++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (!entry->dict) +- continue; ++ if (!entry->dict) ++ continue; + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (!entry->inode) ++ continue; + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } +- local->op_ret += op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } ++ local->op_ret += op_ret; + + unwind: +- if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, +- &local->entries_head, xdata); +- else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); +- return 0; ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, ++ &local->entries_head, xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; + } + +-int32_t +-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- fd_t *fd = NULL; +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t last_entry = _gf_false; ++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ fd_t *fd = NULL; ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t last_entry = _gf_false; + +- local = frame->local; +- fd = local->fd; ++ local = frame->local; ++ fd = local->fd; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- if (last_entry) +- last_entry = _gf_false; +- +- if (__is_root_gfid(fd->inode->gfid) && +- !(strcmp(entry->d_name, GF_SHARD_DIR))) { +- local->offset = entry->d_off; +- op_ret--; +- last_entry = _gf_true; +- continue; +- } ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ if (last_entry) ++ last_entry = _gf_false; + +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ if (__is_root_gfid(fd->inode->gfid) && ++ !(strcmp(entry->d_name, GF_SHARD_DIR))) { ++ local->offset = entry->d_off; ++ op_ret--; ++ last_entry = _gf_true; ++ continue; ++ } + +- if (!entry->dict) +- continue; ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (!entry->dict) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } ++ if (!entry->inode) ++ continue; + +- local->op_ret = op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } + +- if (last_entry) { +- if (local->fop == GF_FOP_READDIR) +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- else +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- return 0; +- } ++ local->op_ret = op_ret; + +-unwind: ++ if (last_entry) { + if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + return 0; +-} ++ } + +-int +-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, int whichop, dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- goto err; ++unwind: ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; ++} ++ ++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, int whichop, dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ goto err; ++ } ++ ++ frame->local = local; ++ ++ local->fd = fd_ref(fd); ++ local->fop = whichop; ++ local->readdir_size = size; ++ INIT_LIST_HEAD(&local->entries_head.list); ++ local->list_inited = _gf_true; ++ ++ if (whichop == GF_FOP_READDIR) { ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); ++ } else { ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Failed to set " ++ "dict value: key:%s, directory gfid=%s", ++ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); ++ goto err; + } + +- frame->local = local; +- +- local->fd = fd_ref(fd); +- local->fop = whichop; +- local->readdir_size = size; +- INIT_LIST_HEAD(&local->entries_head.list); +- local->list_inited = _gf_true; +- +- if (whichop == GF_FOP_READDIR) { +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); +- } else { +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_log(this->name, GF_LOG_WARNING, +- "Failed to set " +- "dict value: key:%s, directory gfid=%s", +- GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, +- local->xattr_req); +- } ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, ++ local->xattr_req); ++ } + +- return 0; ++ return 0; + + err: +- STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); +- return 0; ++ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); ++ return 0; + } + +-int32_t +-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); +- return 0; ++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); ++ return 0; + } + +-int32_t +-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); +- return 0; ++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); ++ return 0; + } + +-int32_t +-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, +- dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, ++ fd, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, ++ loc, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->fop == GF_FOP_SETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } else if (local->fop == GF_FOP_FSETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- +- return 0; +-} ++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-int +-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *prebuf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- local->postbuf = *postbuf; +- local->postbuf.ia_size = local->prebuf.ia_size; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ if (local->fop == GF_FOP_SETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } else if (local->fop == GF_FOP_FSETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } + +-unwind: +- local->handler(frame, this); +- return 0; ++ return 0; + } + +-int +-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_SETATTR; +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ local->prebuf = *prebuf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ local->postbuf = *postbuf; ++ local->postbuf.ia_size = local->prebuf.ia_size; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, +- local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); +- return 0; ++unwind: ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FSETATTR; +- local->fd = fd_ref(fd); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_SETATTR; ++ loc_copy(&local->loc, loc); + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, +- local->xattr_req); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, ++ local->xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, +- glusterfs_fop_t fop, fd_t *fd, +- struct iovec *vector, int32_t count, +- off_t offset, uint32_t flags, size_t len, +- struct iobref *iobref, dict_t *xdata) +-{ +- int ret = 0; +- int i = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto out; +- } ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- switch (fop) { +- case GF_FOP_WRITE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vector, +- count, offset, flags, iobref, xdata); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, flags, +- offset, len, xdata); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, offset, +- len, xdata); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, offset, +- len, xdata); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto out; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- frame->local = local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto out; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto out; +- +- if (vector) { +- local->vector = iov_dup(vector, count); +- if (!local->vector) +- goto out; +- for (i = 0; i < count; i++) +- local->total_size += vector[i].iov_len; +- local->count = count; +- } else { +- local->total_size = len; +- } ++ frame->local = local; + +- local->fop = fop; +- local->offset = offset; +- local->flags = flags; +- if (iobref) +- local->iobref = iobref_ref(iobref); +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->resolver_base_inode = local->fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FSETATTR; ++ local->fd = fd_ref(fd); + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_common_inode_write_post_lookup_handler); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, ++ local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, fd_t *fd, ++ struct iovec *vector, int32_t count, ++ off_t offset, uint32_t flags, size_t len, ++ struct iobref *iobref, dict_t *xdata) { ++ int ret = 0; ++ int i = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto out; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, ++ fd, vector, count, offset, flags, iobref, xdata); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, ++ len, xdata); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, ++ xdata); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto out; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto out; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto out; ++ ++ if (vector) { ++ local->vector = iov_dup(vector, count); ++ if (!local->vector) ++ goto out; ++ for (i = 0; i < count; i++) ++ local->total_size += vector[i].iov_len; ++ local->count = count; ++ } else { ++ local->total_size = len; ++ } ++ ++ local->fop = fop; ++ local->offset = offset; ++ local->flags = flags; ++ if (iobref) ++ local->iobref = iobref_ref(iobref); ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->resolver_base_inode = local->fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_common_inode_write_post_lookup_handler); ++ return 0; + out: +- shard_common_failure_unwind(fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vector, int32_t count, off_t offset, uint32_t flags, +- struct iobref *iobref, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, +- offset, flags, 0, iobref, xdata); +- return 0; ++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vector, int32_t count, off_t offset, ++ uint32_t flags, struct iobref *iobref, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, ++ offset, flags, 0, iobref, xdata); ++ return 0; + } + +-int +-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, +- int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +-{ +- if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && +- (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) +- goto out; ++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ int32_t keep_size, off_t offset, size_t len, ++ dict_t *xdata) { ++ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && ++ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) ++ goto out; + +- shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, +- offset, keep_size, len, NULL, xdata); +- return 0; ++ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, ++ offset, keep_size, len, NULL, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); ++ return 0; + } + +-int +-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- off_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ off_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int +-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- size_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ size_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int32_t +-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) +-{ +- /* TBD */ +- gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, +- "seek called on %s.", uuid_utoa(fd->inode->gfid)); +- shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); +- return 0; ++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) { ++ /* TBD */ ++ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, ++ "seek called on %s.", uuid_utoa(fd->inode->gfid)); ++ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); ++ return 0; + } + +-int32_t +-mem_acct_init(xlator_t *this) +-{ +- int ret = -1; +- +- if (!this) +- return ret; ++int32_t mem_acct_init(xlator_t *this) { ++ int ret = -1; + +- ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); ++ if (!this) ++ return ret; + +- if (ret != 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, +- "Memory accounting init" +- "failed"); +- return ret; +- } ++ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); + return ret; ++ } ++ ++ return ret; + } + +-int +-init(xlator_t *this) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int init(xlator_t *this) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- if (!this) { +- gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, +- "this is NULL. init() failed"); +- return -1; +- } ++ if (!this) { ++ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, ++ "this is NULL. init() failed"); ++ return -1; ++ } + +- if (!this->parents) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "Dangling volume. Check volfile"); +- goto out; +- } ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "Dangling volume. Check volfile"); ++ goto out; ++ } + +- if (!this->children || this->children->next) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "shard not configured with exactly one sub-volume. " +- "Check volfile"); +- goto out; +- } ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "shard not configured with exactly one sub-volume. " ++ "Check volfile"); ++ goto out; ++ } + +- priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); +- if (!priv) +- goto out; ++ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); ++ if (!priv) ++ goto out; + +- GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); ++ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + +- GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); ++ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + +- GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); ++ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + +- this->local_pool = mem_pool_new(shard_local_t, 128); +- if (!this->local_pool) { +- ret = -1; +- goto out; +- } +- gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); +- gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); ++ this->local_pool = mem_pool_new(shard_local_t, 128); ++ if (!this->local_pool) { ++ ret = -1; ++ goto out; ++ } ++ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); ++ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + +- this->private = priv; +- LOCK_INIT(&priv->lock); +- INIT_LIST_HEAD(&priv->ilist_head); +- ret = 0; ++ this->private = priv; ++ LOCK_INIT(&priv->lock); ++ INIT_LIST_HEAD(&priv->ilist_head); ++ ret = 0; + out: +- if (ret) { +- GF_FREE(priv); +- mem_pool_destroy(this->local_pool); +- } ++ if (ret) { ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ } + +- return ret; ++ return ret; + } + +-void +-fini(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; ++void fini(xlator_t *this) { ++ shard_priv_t *priv = NULL; + +- GF_VALIDATE_OR_GOTO("shard", this, out); ++ GF_VALIDATE_OR_GOTO("shard", this, out); + +- mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + +- priv = this->private; +- if (!priv) +- goto out; ++ priv = this->private; ++ if (!priv) ++ goto out; + +- this->private = NULL; +- LOCK_DESTROY(&priv->lock); +- GF_FREE(priv); ++ this->private = NULL; ++ LOCK_DESTROY(&priv->lock); ++ GF_FREE(priv); + + out: +- return; ++ return; + } + +-int +-reconfigure(xlator_t *this, dict_t *options) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int reconfigure(xlator_t *this, dict_t *options) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- priv = this->private; ++ priv = this->private; + +- GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); ++ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + +- GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, +- uint32, out); +- ret = 0; ++ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, ++ out); ++ ret = 0; + + out: +- return ret; ++ return ret; + } + +-int +-shard_forget(xlator_t *this, inode_t *inode) +-{ +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++int shard_forget(xlator_t *this, inode_t *inode) { ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; + +- priv = this->private; +- if (!priv) +- return 0; ++ priv = this->private; ++ if (!priv) ++ return 0; + +- inode_ctx_del(inode, this, &ctx_uint); +- if (!ctx_uint) +- return 0; ++ inode_ctx_del(inode, this, &ctx_uint); ++ if (!ctx_uint) ++ return 0; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- /* When LRU limit reaches inode will be forcefully removed from the +- * table, inode needs to be removed from LRU of shard as well. +- */ +- if (!list_empty(&ctx->ilist)) { +- LOCK(&priv->lock); +- { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- } +- UNLOCK(&priv->lock); ++ /* When LRU limit reaches inode will be forcefully removed from the ++ * table, inode needs to be removed from LRU of shard as well. ++ */ ++ if (!list_empty(&ctx->ilist)) { ++ LOCK(&priv->lock); ++ { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; + } +- GF_FREE(ctx); ++ UNLOCK(&priv->lock); ++ } ++ GF_FREE(ctx); + +- return 0; ++ return 0; + } + +-int +-shard_release(xlator_t *this, fd_t *fd) +-{ +- /* TBD */ +- return 0; ++int shard_release(xlator_t *this, fd_t *fd) { ++ /* TBD */ ++ return 0; + } + +-int +-shard_priv_dump(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- char key_prefix[GF_DUMP_MAX_BUF_LEN] = { +- 0, +- }; +- char *str = NULL; ++int shard_priv_dump(xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ char key_prefix[GF_DUMP_MAX_BUF_LEN] = { ++ 0, ++ }; ++ char *str = NULL; + +- priv = this->private; ++ priv = this->private; + +- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section("%s", key_prefix); +- str = gf_uint64_2human_readable(priv->block_size); +- gf_proc_dump_write("shard-block-size", "%s", str); +- gf_proc_dump_write("inode-count", "%d", priv->inode_count); +- gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); ++ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); ++ gf_proc_dump_add_section("%s", key_prefix); ++ str = gf_uint64_2human_readable(priv->block_size); ++ gf_proc_dump_write("shard-block-size", "%s", str); ++ gf_proc_dump_write("inode-count", "%d", priv->inode_count); ++ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); ++ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + +- GF_FREE(str); ++ GF_FREE(str); + +- return 0; ++ return 0; + } + +-int +-shard_releasedir(xlator_t *this, fd_t *fd) +-{ +- return 0; +-} ++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } + + struct xlator_fops fops = { + .lookup = shard_lookup, +-- +1.8.3.1 + diff --git a/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch new file mode 100644 index 0000000..df971b8 --- /dev/null +++ b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch @@ -0,0 +1,162 @@ +From 562283ad34021bbf4fc540127ee7072d5152d34d Mon Sep 17 00:00:00 2001 +From: Yuval Turgeman +Date: Wed, 24 Jul 2019 16:42:22 +0300 +Subject: [PATCH 336/336] spec: check and return exit code in rpm scripts + +lua's error() call expects a value as its second argument, and this is +taken from the `val` variable, while the `ok` is boolean. This causes +the rpm scripts to fail on: + +bad argument #2 to 'error' (number expected, got boolean) + +Label: DOWNSTREAM ONLY +BUG: 1768786 +Change-Id: I9c6b1f62ebf15dbc93196d018bc1fd628b36fc33 +>Signed-off-by: Yuval Turgeman +Reviewed-on: https://code.engineering.redhat.com/gerrit/186405 +Reviewed-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 55 +++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 33 insertions(+), 22 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 91180db..1b975b2 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1572,8 +1572,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1606,8 +1607,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1640,8 +1642,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1674,8 +1677,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1707,8 +1711,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1740,8 +1745,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1775,8 +1781,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1810,8 +1817,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1845,8 +1853,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1881,8 +1890,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1916,8 +1926,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + %posttrans server +-- +1.8.3.1 + diff --git a/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch new file mode 100644 index 0000000..b18ef4f --- /dev/null +++ b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch @@ -0,0 +1,455 @@ +From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 9 Aug 2019 14:34:22 +0530 +Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size + +If the glusterfs fuse client process is unable to +process the invalidate requests quickly enough, the +number of such requests quickly grows large enough +to use a significant amount of memory. +We are now introducing another option to set an upper +limit on these to prevent runaway memory usage. + +> Upstream https://review.gluster.org/23187 +> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 +> Fixes: bz#1732717 +> Signed-off-by: N Balachandran + +BUG: 1763208 +Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/187529 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 5 +++ + glusterfsd/src/glusterfsd.c | 21 ++++++++++ + glusterfsd/src/glusterfsd.h | 3 +- + libglusterfs/src/glusterfs/glusterfs.h | 1 + + libglusterfs/src/glusterfs/inode.h | 1 + + libglusterfs/src/inode.c | 31 +++++++++++---- + xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++------- + xlators/mount/fuse/src/fuse-bridge.h | 3 +- + xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++ + 9 files changed, 108 insertions(+), 24 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index 286631b..b35b362 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None] + Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] + .TP + .TP ++\fBinvalidate-limit=\fRN ++Suspend fuse invalidations implied by 'lru-limit' if number of outstanding ++invalidations reaches N ++.TP ++.TP + \fBbackground-qlen=\fRN + Set fuse module's background queue length to N [default: 64] + .TP +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 5b5e996..0856471 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = { + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " + "[default: 131072]"}, ++ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, ++ "Suspend inode invalidations implied by 'lru-limit' if the number of " ++ "outstanding invalidations reaches N"}, + {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, + "Set fuse module's background queue length to N " + "[default: 64]"}, +@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) + } + } + ++ if (cmd_args->invalidate_limit >= 0) { ++ ret = dict_set_int32(options, "invalidate-limit", ++ cmd_args->invalidate_limit); ++ if (ret < 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "invalidate-limit"); ++ goto err; ++ } ++ } ++ + if (cmd_args->background_qlen) { + ret = dict_set_int32(options, "background-qlen", + cmd_args->background_qlen); +@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state) + argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); + break; + ++ case ARGP_FUSE_INVALIDATE_LIMIT_KEY: ++ if (!gf_string2int32(arg, &cmd_args->invalidate_limit)) ++ break; ++ ++ argp_failure(state, -1, 0, "unknown invalidate limit option %s", ++ arg); ++ break; ++ + case ARGP_FUSE_BACKGROUND_QLEN_KEY: + if (!gf_string2int(arg, &cmd_args->background_qlen)) + break; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index fa55789..ee655f0 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -111,7 +111,8 @@ enum argp_option_keys { + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_AUTO_INVAL_KEY = 191, +- ARGP_BRICK_MUX_KEY = 192 ++ ARGP_BRICK_MUX_KEY = 192, ++ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 79c93ae..3b594c0 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -541,6 +541,7 @@ struct _cmd_args { + int client_pid_set; + unsigned uid_map_root; + int32_t lru_limit; ++ int32_t invalidate_limit; + int background_qlen; + int congestion_threshold; + char *fuse_mountopts; +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index 52efdd8..4421c47 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -107,6 +107,7 @@ struct _inode { + struct list_head list; /* active/lru/purge */ + + struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ ++ bool in_invalidate_list; /* Set if inode is in table invalidate list */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ + }; + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 96ddea5..5331e93 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear) + + this = THIS; + +- if (clear && inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (clear && inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } +@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear) + inode->_ctx[index].ref--; + } + +- if (!inode->ref && !inode->invalidate_sent) { ++ if (!inode->ref && !inode->in_invalidate_list) { + inode->table->active_size--; + + nlookup = GF_ATOMIC_GET(inode->nlookup); +@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate) + return inode; + + if (!inode->ref) { +- if (inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { +- inode->invalidate_sent = true; ++ inode->in_invalidate_list = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { +@@ -1609,6 +1609,7 @@ static int + inode_table_prune(inode_table_t *table) + { + int ret = 0; ++ int ret1 = 0; + struct list_head purge = { + 0, + }; +@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table) + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { ++ if (entry->invalidate_sent) { ++ list_move_tail(&entry->list, &table->lru); ++ continue; ++ } + __inode_ref(entry, true); + tmp = entry; + break; +@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table) + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; +- table->invalidator_fn(table->invalidator_xl, tmp); ++ ret1 = table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; +- inode_unref(tmp); ++ pthread_mutex_lock(&table->lock); ++ { ++ if (!ret1) { ++ tmp->invalidate_sent = true; ++ __inode_unref(tmp, false); ++ } else { ++ /* Move this back to the lru list*/ ++ __inode_unref(tmp, true); ++ } ++ } ++ pthread_mutex_unlock(&table->lock); + } + + /* Just so that if purge list is handled too, then clear it off */ +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1c946a2..8b2e7f0 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log; + void + fini(xlator_t *this_xl); + +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + + /* +@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) + #define send_fuse_obj(this, finh, obj) \ + send_fuse_data(this, finh, obj, sizeof(*(obj))) + +-static void ++static int32_t + fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + + priv = this->private; + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) + { + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- break; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); + } + + #endif +- return; ++ return 0; + } + + /* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + priv = this->private; + + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- return; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); +@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fuse_invalidate_inode not implemented on this system"); + #endif +- return; ++ return 0; + } + + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + static int32_t + fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) + { +- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); +- return 0; ++ int32_t ret = 0; ++ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); ++ return ret; + } + #endif + +@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, + finh->nodeid); + #if FUSE_KERNEL_MINOR_VERSION >= 11 +- fuse_invalidate_entry(this, finh->nodeid); ++ ret = fuse_invalidate_entry(this, finh->nodeid); ++ if (ret) ++ op_errno = EBUSY; + #endif + goto done; + } +@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data) + fuse_invalidate_node_t, next); + + list_del_init(&node->next); ++ priv->invalidate_count--; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data) + list_del_init(&node->next); + GF_FREE(node); + } ++ priv->invalidate_count = 0; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this) + (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); ++ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); ++ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, ++ private->invalidate_count); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); + + return 0; +@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl) + + GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + ++ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, ++ cleanup_exit); ++ + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); + + GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); +@@ -6955,6 +6978,15 @@ struct volume_options options[] = { + "reaching this limit (0 means 'unlimited')", + }, + { ++ .key = {"invalidate-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "0", ++ .min = 0, ++ .description = "suspend invalidations as of 'lru-limit' if the number " ++ "of outstanding invalidations reaches this limit " ++ "(0 means 'unlimited')", ++ }, ++ { + .key = {"auto-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 697bd88..2311582 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -139,7 +139,7 @@ struct fuse_private { + pthread_cond_t invalidate_cond; + pthread_mutex_t invalidate_mutex; + gf_boolean_t reverse_fuse_thread_started; +- ++ uint64_t invalidate_count; + /* For communicating with separate mount thread. */ + int status_pipe[2]; + +@@ -191,6 +191,7 @@ struct fuse_private { + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; ++ uint32_t invalidate_limit; + }; + typedef struct fuse_private fuse_private_t; + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index cbde42d..61d7422 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -257,6 +257,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); + fi + ++ if [ -n "$invalidate_limit" ]; then ++ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); ++ fi ++ + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi +@@ -505,6 +509,9 @@ with_options() + "lru-limit") + lru_limit=$value + ;; ++ "invalidate-limit") ++ invalidate_limit=$value ++ ;; + "background-qlen") + bg_qlen=$value + ;; +-- +1.8.3.1 + diff --git a/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch new file mode 100644 index 0000000..b108bd0 --- /dev/null +++ b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch @@ -0,0 +1,83 @@ +From 6d2e12a53ef0bcbeea274c47537a0c707a3f7b1e Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 20 Sep 2019 13:30:42 +0530 +Subject: [PATCH 338/344] glusterfs/fuse: Reduce the default lru-limit value + +The current lru-limit value still uses memory for +upto 128K inodes. +Reduce the default value of lru-limit to 64K. + +> Upstream https://review.gluster.org/23461 +> Change-Id: Ica2dd4f8f5fde45cb5180d8f02c3d86114ac52b3 +> Fixes: bz#1753880 +> Signed-off-by: N Balachandran +> Signed-off-by: Csaba Henk + +BUG: 1763208 +Change-Id: I04ab39b5278e702aacdceebfa5b63702b9f9703b +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/187535 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 2 +- + glusterfsd/src/glusterfsd.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.h | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index b35b362..87a5669 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -123,7 +123,7 @@ Provide list of backup volfile servers in the following format [default: None] + .TP + .TP + \fBlru-limit=\fRN +-Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] ++Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536] + .TP + .TP + \fBinvalidate-limit=\fRN +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 0856471..974fb88 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -211,7 +211,7 @@ static struct argp_option gf_options[] = { + "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " +- "[default: 131072]"}, ++ "[default: 65536]"}, + {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, + "Suspend inode invalidations implied by 'lru-limit' if the number of " + "outstanding invalidations reaches N"}, +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 8b2e7f0..ebe5c28 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -6972,7 +6972,7 @@ struct volume_options options[] = { + { + .key = {"lru-limit"}, + .type = GF_OPTION_TYPE_INT, +- .default_value = "131072", ++ .default_value = "65536", + .min = 0, + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 2311582..cf4479c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -189,7 +189,7 @@ struct fuse_private { + gf_boolean_t flush_handle_interrupt; + gf_boolean_t fuse_auto_inval; + +- /* LRU Limit, if not set, default is 128k for now */ ++ /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; + }; +-- +1.8.3.1 + diff --git a/SOURCES/0339-geo-rep-fix-integer-config-validation.patch b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch new file mode 100644 index 0000000..45f3ede --- /dev/null +++ b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch @@ -0,0 +1,93 @@ +From 8b5b3b247a00515d3188453c27b0ba749e93d325 Mon Sep 17 00:00:00 2001 +From: Aravinda VK +Date: Tue, 26 Mar 2019 13:20:13 +0530 +Subject: [PATCH 339/344] geo-rep: fix integer config validation + +ssh-port validation is mentioned as `validation=int` in template +`gsyncd.conf`, but not handled this during geo-rep config set. + +upstream patch: + https://review.gluster.org/#/c/glusterfs/+/22418/ +Backport of: + + >Fixes: bz#1692666 + >Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 + >Signed-off-by: Aravinda VK + >Signed-off-by: Sunny Kumar + +BUG: 1782162 +Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/187533 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncdconfig.py | 23 ++++++++++++++++++----- + tests/00-geo-rep/georep-basic-dr-rsync.t | 3 +++ + 2 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index f823311..8848071 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -329,6 +329,9 @@ class Gconf(object): + if item["validation"] == "unixtime": + return validate_unixtime(value) + ++ if item["validation"] == "int": ++ return validate_int(value) ++ + return False + + def _is_config_changed(self): +@@ -381,6 +384,14 @@ def config_upgrade(config_file, ret): + config.write(configfile) + + ++def validate_int(value): ++ try: ++ _ = int(value) ++ return True ++ except ValueError: ++ return False ++ ++ + def validate_unixtime(value): + try: + y = datetime.fromtimestamp(int(value)).strftime("%Y") +@@ -393,11 +404,13 @@ def validate_unixtime(value): + + + def validate_minmax(value, minval, maxval): +- value = int(value) +- minval = int(minval) +- maxval = int(maxval) +- +- return value >= minval and value <= maxval ++ try: ++ value = int(value) ++ minval = int(minval) ++ maxval = int(maxval) ++ return value >= minval and value <= maxval ++ except ValueError: ++ return False + + + def validate_choice(value, allowed_values): +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index b432635..b6fbf18 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -71,6 +71,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created" + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + ++#Config Set ssh-port to validate int validation ++TEST $GEOREP_CLI $master $slave config ssh-port 22 ++ + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} + +-- +1.8.3.1 + diff --git a/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch new file mode 100644 index 0000000..54b2706 --- /dev/null +++ b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch @@ -0,0 +1,46 @@ +From 0c996d6c40c625f8a0ee6be2c220c89aaf70c840 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 10 Dec 2019 08:35:23 +0530 +Subject: [PATCH 340/344] rpc: event_slot_alloc converted infinite loop after + reach slot_used to 1024 + +Problem: In the commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2 missed one + condition to come out from the loop so after reach the slot_used to + 1024 loop has become infinite loop + +Solution: Correct the code path to avoid the infinite loop + +> Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +> Fixes: bz#1781440 +> Credits: Xavi Hernandez +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit 8030f9c0f092170ceb50cedf59b9c330022825b7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23843/) + +Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +BUG: 1781444 +Credits: Xavi Hernandez +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/187460 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + libglusterfs/src/event-epoll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 65f5efd..5afb2f2 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -92,7 +92,7 @@ retry: + while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: +- continue; ++ break; + case 0: + if (!event_pool->ereg[i]) { + table = __event_newtable(event_pool, i); +-- +1.8.3.1 + diff --git a/SOURCES/0341-socket-fix-error-handling.patch b/SOURCES/0341-socket-fix-error-handling.patch new file mode 100644 index 0000000..0eb68d1 --- /dev/null +++ b/SOURCES/0341-socket-fix-error-handling.patch @@ -0,0 +1,742 @@ +From 2c99b7db00a6238fd43053dd672c8ce519d8fd27 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 11 Dec 2019 18:21:14 +0100 +Subject: [PATCH 341/344] socket: fix error handling + +When __socket_proto_state_machine() detected a problem in the size of +the request or it couldn't allocate an iobuf of the requested size, it +returned -ENOMEM (-12). However the caller was expecting only -1 in +case of error. For this reason the error passes undetected initially, +adding back the socket to the epoll object. On further processing, +however, the error is finally detected and the connection terminated. +Meanwhile, another thread could receive a poll_in event from the same +connection, which could cause races with the connection destruction. +When this happened, the process crashed. + +To fix this, all error detection conditions have been hardened to be +more strict on what is valid and what not. Also, we don't return +-ENOMEM anymore. We always return -1 in case of error. + +An additional change has been done to prevent destruction of the +transport object while it may still be needed. + +Upstream patch: +> Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23861 +> Fixes: bz#1782495 +> Signed-off-by: Xavi Hernandez + +Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +BUG: 1779696 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/187689 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +--- + rpc/rpc-transport/socket/src/socket.c | 173 ++++++++++++++++++---------------- + 1 file changed, 90 insertions(+), 83 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index bf2fa71..f54ca83 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -173,7 +173,7 @@ ssl_setup_connection_params(rpc_transport_t *this); + \ + ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \ + &in->pending_count, &bytes_read); \ +- if (ret == -1) \ ++ if (ret < 0) \ + break; \ + __socket_proto_update_priv_after_read(priv, ret, bytes_read); \ + } +@@ -739,7 +739,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + ret = sys_writev(sock, opvector, IOV_MIN(opcount)); + } + +- if (ret == 0 || (ret == -1 && errno == EAGAIN)) { ++ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) { + /* done for now */ + break; + } else if (ret > 0) +@@ -754,7 +754,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENODATA; + ret = -1; + } +- if (ret == -1 && errno == EAGAIN) { ++ if ((ret < 0) && (errno == EAGAIN)) { + /* done for now */ + break; + } else if (ret > 0) +@@ -770,7 +770,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENOTCONN; + break; + } +- if (ret == -1) { ++ if (ret < 0) { + if (errno == EINTR) + continue; + +@@ -907,7 +907,7 @@ __socket_disconnect(rpc_transport_t *this) + gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this, + priv->sock); + +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "tearing down socket connection"); + ret = __socket_teardown_connection(this); +@@ -942,7 +942,7 @@ __socket_server_bind(rpc_transport_t *this) + + ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno)); + } +@@ -955,7 +955,7 @@ __socket_server_bind(rpc_transport_t *this) + if (reuse_check_sock >= 0) { + ret = connect(reuse_check_sock, SA(&unix_addr), + this->myinfo.sockaddr_len); +- if ((ret == -1) && (ECONNREFUSED == errno)) { ++ if ((ret != 0) && (ECONNREFUSED == errno)) { + sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path); + } + gf_log(this->name, GF_LOG_INFO, +@@ -967,7 +967,7 @@ __socket_server_bind(rpc_transport_t *this) + ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr, + this->myinfo.sockaddr_len); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s", + this->myinfo.identifier, strerror(errno)); + if (errno == EADDRINUSE) { +@@ -976,7 +976,7 @@ __socket_server_bind(rpc_transport_t *this) + } + if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) { + if (getsockname(priv->sock, SA(&this->myinfo.sockaddr), +- &this->myinfo.sockaddr_len) == -1) { ++ &this->myinfo.sockaddr_len) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s)", priv->sock, + strerror(errno)); +@@ -1004,7 +1004,7 @@ __socket_nonblock(int fd) + + flags = fcntl(fd, F_GETFL); + +- if (flags != -1) ++ if (flags >= 0) + ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + + return ret; +@@ -1034,7 +1034,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #endif + + ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive option on socket %d", fd); + goto err; +@@ -1051,7 +1051,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl, + sizeof(keepaliveintvl)); + #endif +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive interval on socket %d", fd); + goto err; +@@ -1062,7 +1062,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle, + sizeof(keepaliveidle)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd, + strerror(errno)); +@@ -1070,7 +1070,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + } + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl, + sizeof(keepaliveintvl)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep interval %d on socket %d, %s", + keepaliveintvl, fd, strerror(errno)); +@@ -1082,7 +1082,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + goto done; + ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms, + sizeof(timeout_ms)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_USER_TIMEOUT %d on socket %d, %s", +@@ -1093,7 +1093,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #if defined(TCP_KEEPCNT) + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt, + sizeof(keepalivecnt)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_KEEPCNT %d on socket %d, %s", +@@ -1366,7 +1366,7 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx) + + pthread_mutex_lock(&priv->out_lock); + { +- if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) { ++ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) { + __socket_ioq_flush(this); + __socket_reset(this); + socket_closed = _gf_true; +@@ -1405,7 +1405,7 @@ socket_event_poll_out(rpc_transport_t *this) + if (priv->connected == 1) { + ret = __socket_ioq_churn(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "__socket_ioq_churn returned -1; " + "disconnecting socket"); +@@ -1463,7 +1463,7 @@ __socket_read_simple_msg(rpc_transport_t *this) + &bytes_read); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "reading from socket failed. Error (%s), " + "peer (%s)", +@@ -1661,8 +1661,8 @@ __socket_read_vectored_request(rpc_transport_t *this, + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + request->vector_state = SP_STATE_VECTORED_REQUEST_INIT; + in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent - + (unsigned long) +@@ -1739,8 +1739,8 @@ __socket_read_request(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + request->header_state = SP_STATE_REQUEST_HEADER_INIT; + } + +@@ -1870,8 +1870,8 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2003,8 +2003,8 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2103,8 +2103,8 @@ __socket_read_accepted_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT; + } +@@ -2169,8 +2169,8 @@ __socket_read_vectored_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT; + in->payload_vector.iov_len = (unsigned long)frag->fragcurrent - +@@ -2237,7 +2237,7 @@ __socket_read_reply(rpc_transport_t *this) + /* Transition back to externally visible state. */ + frag->state = SP_STATE_READ_MSGTYPE; + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "notify for event MAP_XID failed for %s", + this->peerinfo.identifier); +@@ -2315,8 +2315,8 @@ __socket_read_frag(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + /* frag->state = SP_STATE_NADA; */ + frag->state = SP_STATE_RPCFRAG_INIT; + } +@@ -2400,7 +2400,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + ret = __socket_readv(this, in->pending_vector, 1, + &in->pending_vector, &in->pending_count, + NULL); +- if (ret == -1) ++ if (ret < 0) + goto out; + + if (ret > 0) { +@@ -2422,7 +2422,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr); + + if (in->total_bytes_read >= GF_UNIT_GB) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2430,7 +2430,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + this->ctx->iobuf_pool, + (in->total_bytes_read + sizeof(in->fraghdr))); + if (!iobuf) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2457,7 +2457,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + case SP_STATE_READING_FRAG: + ret = __socket_read_frag(this); + +- if ((ret == -1) || ++ if ((ret < 0) || + (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) { + goto out; + } +@@ -2575,7 +2575,7 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled) + pthread_mutex_unlock(&priv->notify.lock); + } + +- if (notify_handled && (ret != -1)) ++ if (notify_handled && (ret >= 0)) + event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen); + + if (pollin) { +@@ -2618,10 +2618,10 @@ socket_connect_finish(rpc_transport_t *this) + + ret = __socket_connect_finish(priv->sock); + +- if (ret == -1 && errno == EINPROGRESS) ++ if ((ret < 0) && (errno == EINPROGRESS)) + ret = 1; + +- if (ret == -1 && errno != EINPROGRESS) { ++ if ((ret < 0) && (errno != EINPROGRESS)) { + if (!priv->connect_finish_log) { + gf_log(this->name, GF_LOG_ERROR, + "connection to %s failed (%s); " +@@ -2640,7 +2640,7 @@ socket_connect_finish(rpc_transport_t *this) + + ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s) - " + "disconnecting socket", +@@ -2924,6 +2924,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + return; + } + ++ /* At this point we are sure no other thread is using the transport because ++ * we cannot receive more events until we call gf_event_handled(). However ++ * this function may call gf_event_handled() in some cases. When this is ++ * done, the transport may be destroyed at any moment if another thread ++ * handled an error event. To prevent that we take a reference here. */ ++ rpc_transport_ref(this); ++ + GF_VALIDATE_OR_GOTO("socket", this, out); + GF_VALIDATE_OR_GOTO("socket", this->private, out); + GF_VALIDATE_OR_GOTO("socket", this->xl, out); +@@ -2960,7 +2967,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + if (ret > 0) { + gf_log(this->name, GF_LOG_TRACE, + "(sock:%d) returning to wait on socket", priv->sock); +- return; ++ goto out; + } + } else { + char *sock_type = (priv->is_server ? "Server" : "Client"); +@@ -3015,7 +3022,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + out: +- return; ++ rpc_transport_unref(this); + } + + static void +@@ -3074,7 +3081,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + event_handled(ctx->event_pool, fd, idx, gen); + +- if (new_sock == -1) { ++ if (new_sock < 0) { + gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -3082,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) { + ret = __socket_nodelay(new_sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "setsockopt() failed for " + "NODELAY (%s)", +@@ -3094,7 +3101,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + ret = __socket_keepalive(new_sock, new_sockaddr.ss_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_WARNING, + "Failed to set keep-alive: %s", strerror(errno)); + } +@@ -3110,7 +3117,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + ret = pthread_mutex_init(&new_trans->lock, NULL); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "pthread_mutex_init() failed: %s; closing newly accepted " + "socket %d", +@@ -3130,7 +3137,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr), + &new_trans->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on socket %d " + "failed (errno:%s); closing newly accepted socket", +@@ -3237,7 +3244,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + */ + ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans); + +- if (ret != -1) { ++ if (ret >= 0) { + new_priv->idx = event_register( + ctx->event_pool, new_sock, socket_event_handler, new_trans, + 1, 0, new_trans->notify_poller_death); +@@ -3275,7 +3282,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + rpc_transport_unref(new_trans); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket"); + sys_close(new_sock); + /* this unref is to actually cause the destruction of +@@ -3406,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port) + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "connect () called on transport " + "already connected"); +@@ -3420,7 +3427,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = socket_client_get_remote_sockaddr(this, &sock_union.sa, + &sockaddr_len, &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + /* logged inside client_get_remote_sockaddr */ + goto unlock; + } +@@ -3439,7 +3446,7 @@ socket_connect(rpc_transport_t *this, int port) + this->peerinfo.sockaddr_len = sockaddr_len; + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + ret = -1; +@@ -3451,7 +3458,7 @@ socket_connect(rpc_transport_t *this, int port) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window " + "size failed: %d: %d: %s", +@@ -3459,7 +3466,7 @@ socket_connect(rpc_transport_t *this, int port) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size " + "failed: %d: %d: %s", +@@ -3484,7 +3491,7 @@ socket_connect(rpc_transport_t *this, int port) + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)", + priv->sock, strerror(errno)); + } +@@ -3494,7 +3501,7 @@ socket_connect(rpc_transport_t *this, int port) + ret = __socket_keepalive(priv->sock, sa_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s", + strerror(errno)); + } +@@ -3516,7 +3523,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = client_bind(this, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len, priv->sock); +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s", + strerror(errno)); + goto handler; +@@ -3525,7 +3532,7 @@ socket_connect(rpc_transport_t *this, int port) + /* make socket non-blocking for all types of sockets */ + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto handler; +@@ -3552,7 +3559,7 @@ socket_connect(rpc_transport_t *this, int port) + + connect_attempted = _gf_true; + +- if (ret == -1 && errno == ENOENT && ign_enoent) { ++ if ((ret != 0) && (errno == ENOENT) && ign_enoent) { + gf_log(this->name, GF_LOG_WARNING, + "Ignore failed connection attempt on %s, (%s) ", + this->peerinfo.identifier, strerror(errno)); +@@ -3570,7 +3577,7 @@ socket_connect(rpc_transport_t *this, int port) + goto handler; + } + +- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) { ++ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) { + /* For unix path based sockets, the socket path is + * cryptic (md5sum of path) and may not be useful for + * the user in debugging so log it in DEBUG +@@ -3634,8 +3641,8 @@ socket_connect(rpc_transport_t *this, int port) + pthread_mutex_unlock(&priv->out_lock); + + err: +- /* if sock != -1, then cleanup is done from the event handler */ +- if (ret == -1 && sock == -1) { ++ /* if sock >= 0, then cleanup is done from the event handler */ ++ if ((ret < 0) && (sock < 0)) { + /* Cleaup requires to send notification to upper layer which + intern holds the big_lock. There can be dead-lock situation + if big_lock is already held by the current thread. +@@ -3689,20 +3696,20 @@ socket_listen(rpc_transport_t *this) + } + pthread_mutex_unlock(&priv->out_lock); + +- if (sock != -1) { ++ if (sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening"); + return ret; + } + + ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len, + &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + return ret; + } + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log(this->name, GF_LOG_DEBUG, "already listening"); + goto unlock; + } +@@ -3712,7 +3719,7 @@ socket_listen(rpc_transport_t *this) + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); + +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + goto unlock; +@@ -3723,7 +3730,7 @@ socket_listen(rpc_transport_t *this) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window size " + "failed: %d: %d: %s", +@@ -3731,7 +3738,7 @@ socket_listen(rpc_transport_t *this) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size failed:" + " %d: %d: %s", +@@ -3741,7 +3748,7 @@ socket_listen(rpc_transport_t *this) + + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() failed for NODELAY (%s)", strerror(errno)); + } +@@ -3750,7 +3757,7 @@ socket_listen(rpc_transport_t *this) + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "NBIO on socket %d failed " + "(errno:%s); closing socket", +@@ -3763,7 +3770,7 @@ socket_listen(rpc_transport_t *this) + + ret = __socket_server_bind(this); + +- if ((ret == -EADDRINUSE) || (ret == -1)) { ++ if (ret < 0) { + /* logged inside __socket_server_bind() */ + gf_log(this->name, GF_LOG_ERROR, + "__socket_server_bind failed;" +@@ -3779,7 +3786,7 @@ socket_listen(rpc_transport_t *this) + + ret = listen(priv->sock, priv->backlog); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "could not set socket %d to listen mode (errno:%s); " + "closing socket", +@@ -4025,7 +4032,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + priv = this->private; + + if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4094,7 +4101,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + if (dict_get(options, "non-blocking-io")) { + optstr = data_to_str(dict_get(options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4109,7 +4116,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -4508,7 +4515,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "non-blocking-io")) { + optstr = data_to_str(dict_get(this->options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4528,7 +4535,7 @@ socket_init(rpc_transport_t *this) + optstr = data_to_str( + dict_get(this->options, "transport.socket.nodelay")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.nodelay' takes only " + "boolean options, not taking any action"); +@@ -4559,7 +4566,7 @@ socket_init(rpc_transport_t *this) + priv->keepalivecnt = GF_KEEPALIVE_COUNT; + if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) == + 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4609,7 +4616,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "transport.socket.read-fail-log")) { + optstr = data_to_str( + dict_get(this->options, "transport.socket.read-fail-log")); +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "'transport.socket.read-fail-log' takes only " + "boolean options; logging socket read fails"); +@@ -4646,7 +4653,7 @@ fini(rpc_transport_t *this) + + priv = this->private; + if (priv) { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + pthread_mutex_lock(&priv->out_lock); + { + __socket_ioq_flush(this); +@@ -4683,7 +4690,7 @@ init(rpc_transport_t *this) + + ret = socket_init(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed"); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch new file mode 100644 index 0000000..028a227 --- /dev/null +++ b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch @@ -0,0 +1,120 @@ +From eb37a3b57415d2d4206ecdd2db10530366a0d1b1 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Fri, 13 Dec 2019 15:20:27 +0530 +Subject: [PATCH 342/344] Revert "hooks: remove selinux hooks" + +This reverts commit 421743b7cfa6a249544f6abb4cca5a612bd20ea1. + +Note:- We are not bringing back features.selinux but just the hooks for + setting SELinux context on bricks + +Label: DOWNSTREAM ONLY + +Change-Id: Iccc10428361cac59b294e1d7aa1ba8187c20029e +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/187691 +Tested-by: RHGS Build Bot +Reviewed-by: Niels de Vos +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 4 ++++ + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/create/Makefile.am | 1 + + extras/hook-scripts/create/post/Makefile.am | 6 ++++++ + extras/hook-scripts/delete/Makefile.am | 1 + + extras/hook-scripts/delete/pre/Makefile.am | 6 ++++++ + glusterfs.spec.in | 2 ++ + 7 files changed, 21 insertions(+), 1 deletion(-) + create mode 100644 extras/hook-scripts/create/Makefile.am + create mode 100644 extras/hook-scripts/create/post/Makefile.am + create mode 100644 extras/hook-scripts/delete/Makefile.am + create mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 327733e..98ee311 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,6 +221,10 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile ++ extras/hook-scripts/create/Makefile ++ extras/hook-scripts/create/post/Makefile ++ extras/hook-scripts/delete/Makefile ++ extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 771b37e..26059d7 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick set start stop reset ++SUBDIRS = add-brick create delete set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +new file mode 100644 +index 0000000..b083a91 +--- /dev/null ++++ b/extras/hook-scripts/create/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +new file mode 100644 +index 0000000..919801a +--- /dev/null ++++ b/extras/hook-scripts/create/post/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-label-brick.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-label-brick.sh ++endif +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +new file mode 100644 +index 0000000..c98a05d +--- /dev/null ++++ b/extras/hook-scripts/delete/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +new file mode 100644 +index 0000000..93a6b85 +--- /dev/null ++++ b/extras/hook-scripts/delete/pre/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-del-fcontext.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-del-fcontext.sh ++endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1b975b2..012989a 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1453,6 +1453,7 @@ exit 0 + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1461,6 +1462,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +-- +1.8.3.1 + diff --git a/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch new file mode 100644 index 0000000..77d2f64 --- /dev/null +++ b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch @@ -0,0 +1,155 @@ +From 8a8c508b529f7609fc5caa10bc79ba817f5d274a Mon Sep 17 00:00:00 2001 +From: Milan Zink +Date: Mon, 5 Feb 2018 15:04:37 +0100 +Subject: [PATCH 343/344] extras/hooks: syntactical errors in SELinux hooks, + scipt logic improved + +Backport of https://review.gluster.org/c/glusterfs/+/19502 + +Change-Id: Ia5fa1df81bbaec3a84653d136a331c76b457f42c +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/187692 +Tested-by: RHGS Build Bot +Reviewed-by: Niels de Vos +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../create/post/S10selinux-label-brick.sh | 13 +++-- + .../delete/pre/S10selinux-del-fcontext.sh | 60 +++++++++++++--------- + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + 3 files changed, 46 insertions(+), 31 deletions(-) + +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index de242d2..f9b4b1a 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,18 +34,21 @@ parse_args () { + + set_brick_labels() + { +- volname=${1} ++ volname="${1}" + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" +- brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}\(/.*\)?" ++ pattern="${b}(/.*)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" +- + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index 6eba66f..e7f4e8f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,45 +15,55 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") +- eval set -- "$ARGS" +- +- while true; do +- case $1 in +- --volname) +- shift +- VOL=$1 +- ;; +- *) +- shift +- break +- ;; +- esac ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ *) + shift +- done ++ break ++ ;; ++ esac ++ shift ++ done + } + + function delete_brick_fcontext() + { +- volname=$1 ++ volname="${1}" ++ ++ # grab the path for each local brick ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ for b in ${brickdirs} ++ do ++ # remove the file context associated with the brick path ++ pattern="${b}(/.*)?" ++ semanage fcontext --delete "${pattern}" + +- # grab the path for each local brick +- brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) ++ # remove the labels on brick path. ++ restorecon -R "${b}" ++ done + +- for b in $brickdirs +- do +- # remove the file context associated with the brick path +- semanage fcontext --delete $b\(/.*\)? +- done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "$VOL" ] && exit 1 ++[ -z "${VOL}" ] && exit 1 + +-delete_brick_fcontext $VOL ++delete_brick_fcontext "${VOL}" + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index aeb73ed..300000b 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-EXPECT 'createPost' cat /tmp/post.out; ++# Spost.sh comes after S10selinux-label-brick.sh under create post hook script ++# list. So consider the delay in setting SELinux context on bricks ++EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +-- +1.8.3.1 + diff --git a/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch new file mode 100644 index 0000000..341aeae --- /dev/null +++ b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch @@ -0,0 +1,412 @@ +From 02a93265fe4e78e7fc3fa8c6caa773cbe02f50b6 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Fri, 20 Dec 2019 16:01:59 +0530 +Subject: [PATCH 344/344] Revert all fixes to include SELinux hook scripts + +Following are the reverts included with this change: + +Revert "extras/hooks: syntactical errors in SELinux hooks, scipt logic improved" +Revert "Revert "hooks: remove selinux hooks"" +Revert "tests: subdir-mount.t is failing for brick_mux regrssion" +Revert "extras/hooks: Install and package newly added post add-brick hook script" +Revert "extras/hooks: Add SELinux label on new bricks during add-brick" + +Label: DOWNSTREAM ONLY + +See bug for more details. + +Change-Id: I5c9b9e0e6446568ce16af17257fa39338198a827 +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/188169 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 4 - + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/add-brick/post/Makefile.am | 4 +- + .../add-brick/post/S10selinux-label-brick.sh | 100 --------------------- + extras/hook-scripts/create/Makefile.am | 1 - + extras/hook-scripts/create/post/Makefile.am | 6 -- + .../create/post/S10selinux-label-brick.sh | 13 ++- + extras/hook-scripts/delete/Makefile.am | 1 - + extras/hook-scripts/delete/pre/Makefile.am | 6 -- + .../delete/pre/S10selinux-del-fcontext.sh | 60 ++++++------- + glusterfs.spec.in | 3 - + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + tests/features/subdir-mount.t | 11 +-- + 13 files changed, 37 insertions(+), 178 deletions(-) + delete mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + delete mode 100644 extras/hook-scripts/create/Makefile.am + delete mode 100644 extras/hook-scripts/create/post/Makefile.am + delete mode 100644 extras/hook-scripts/delete/Makefile.am + delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 98ee311..327733e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,10 +221,6 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile +- extras/hook-scripts/create/Makefile +- extras/hook-scripts/create/post/Makefile +- extras/hook-scripts/delete/Makefile +- extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 26059d7..771b37e 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick create delete set start stop reset ++SUBDIRS = add-brick set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index 9b236df..bfc0c1c 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + endif +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +deleted file mode 100755 +index 4a17c99..0000000 +--- a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# Install to hooks//add-brick/post +-# +-# Add an SELinux file context for each brick using the glusterd_brick_t type. +-# This ensures that the brick is relabeled correctly on an SELinux restart or +-# restore. Subsequently, run a restore on the brick path to set the selinux +-# labels. +-# +-### +- +-PROGNAME="Sselinux" +-OPTSPEC="volname:,version:,gd-workdir:,volume-op:" +-VOL= +- +-parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- --gd-workdir) +- shift +- GLUSTERD_WORKDIR=$1 +- ;; +- --version) +- shift +- ;; +- --volume-op) +- shift +- ;; +- *) +- shift +- break +- ;; +- esac +- shift +- done +-} +- +-set_brick_labels() +-{ +- local volname="${1}" +- local fctx +- local list=() +- +- fctx="$(semanage fcontext --list -C)" +- +- # wait for new brick path to be updated under +- # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ +- sleep 5 +- +- # grab the path for each local brick +- brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- # create a list of bricks for which custom SELinux +- # label doesn't exist +- for b in ${brickdirs}; do +- pattern="${b}(/.*)?" +- echo "${fctx}" | grep "^${pattern}\s" >/dev/null +- if [[ $? -ne 0 ]]; then +- list+=("${pattern}") +- fi +- done +- +- # Add a file context for each brick path in the list and associate with the +- # glusterd_brick_t SELinux type. +- for p in ${list[@]} +- do +- semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" +- done +- +- # Set the labels for which SELinux label was added above +- for b in ${brickdirs} +- do +- echo "${list[@]}" | grep "${b}" >/dev/null +- if [[ $? -eq 0 ]]; then +- restorecon -R "${b}" +- fi +- done +-} +- +-SELINUX_STATE=$(which getenforce && getenforce) +-[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 +- +-parse_args "$@" +-[ -z "${VOL}" ] && exit 1 +- +-set_brick_labels "${VOL}" +- +-exit 0 +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +deleted file mode 100644 +index b083a91..0000000 +--- a/extras/hook-scripts/create/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +deleted file mode 100644 +index 919801a..0000000 +--- a/extras/hook-scripts/create/post/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-label-brick.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-label-brick.sh +-endif +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index f9b4b1a..de242d2 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,21 +34,18 @@ parse_args () { + + set_brick_labels() + { +- volname="${1}" ++ volname=${1} + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" ++ brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}(/.*)?" ++ pattern="${b}\(/.*\)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" ++ + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +deleted file mode 100644 +index c98a05d..0000000 +--- a/extras/hook-scripts/delete/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +deleted file mode 100644 +index 93a6b85..0000000 +--- a/extras/hook-scripts/delete/pre/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-del-fcontext.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-del-fcontext.sh +-endif +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index e7f4e8f..6eba66f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,55 +15,45 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- *) ++ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") ++ eval set -- "$ARGS" ++ ++ while true; do ++ case $1 in ++ --volname) ++ shift ++ VOL=$1 ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac + shift +- break +- ;; +- esac +- shift +- done ++ done + } + + function delete_brick_fcontext() + { +- volname="${1}" +- +- # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- for b in ${brickdirs} +- do +- # remove the file context associated with the brick path +- pattern="${b}(/.*)?" +- semanage fcontext --delete "${pattern}" ++ volname=$1 + +- # remove the labels on brick path. +- restorecon -R "${b}" +- done ++ # grab the path for each local brick ++ brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) + ++ for b in $brickdirs ++ do ++ # remove the file context associated with the brick path ++ semanage fcontext --delete $b\(/.*\)? ++ done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "${VOL}" ] && exit 1 ++[ -z "$VOL" ] && exit 1 + +-delete_brick_fcontext "${VOL}" ++delete_brick_fcontext $VOL + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 012989a..671ee27 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,13 +1447,11 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1462,7 +1460,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index 300000b..aeb73ed 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,9 +46,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-# Spost.sh comes after S10selinux-label-brick.sh under create post hook script +-# list. So consider the delay in setting SELinux context on bricks +-EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; ++EXPECT 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index a02bd6b..8401946 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,17 +85,12 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + +-initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Wait to execute create-subdir-mounts.sh script by glusterd +-newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +-while [ $newcnt -eq $initcnt ] +-do +- newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +- sleep 1 +-done ++# Give time for client process to get notified and use the new ++# volfile after add-brick ++sleep 1 + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch new file mode 100644 index 0000000..48b0cc8 --- /dev/null +++ b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch @@ -0,0 +1,82 @@ +From d45c64e17e1eb8003ac1086cbd3abea32414c7f9 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Tue, 12 Feb 2019 18:33:44 +0530 +Subject: [PATCH 345/346] read-ahead/io-cache: turn off by default + +We've found perf xlators io-cache and read-ahead not adding any +performance improvement. At best read-ahead is redundant due to kernel +read-ahead and at worst io-cache is degrading the performance for +workloads that doesn't involve re-read. Given that VFS already have +both these functionalities, this patch makes these two +translators turned off by default for native fuse mounts. + +For non-native fuse mounts like gfapi (NFS-ganesha/samba) we can have +these xlators on by having custom profiles. + +>Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +>Signed-off-by: Raghavendra Gowdappa +>fixes: bz#1676479 +Upstream fix link: https://review.gluster.org/#/c/glusterfs/+/22203/ + +BUG: 1788656 +Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/188967 +Tested-by: RHGS Build Bot +--- + tests/basic/ec/self-heal.t | 2 ++ + tests/basic/glusterd/volfile_server_switch.t | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++-- + 3 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t +index d217559..6329bb6 100644 +--- a/tests/basic/ec/self-heal.t ++++ b/tests/basic/ec/self-heal.t +@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume set $V0 client-log-level DEBUG + #Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp' + TEST $CLI volume set $V0 performance.write-behind off ++#md-cache can return stale stat due to default timeout being 1 sec ++TEST $CLI volume set $V0 performance.stat-prefetch off + EXPECT "Created" volinfo_field $V0 'Status' + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status' +diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t +index 3090609..e11cfed 100644 +--- a/tests/basic/glusterd/volfile_server_switch.t ++++ b/tests/basic/glusterd/volfile_server_switch.t +@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol + + TEST kill_glusterd 1 + +-TEST $CLI_2 volume set $V0 performance.io-cache off ++TEST $CLI_2 volume set $V0 performance.write-behind off + + # make sure by this time directory will be created + # TODO: suggest ideal time to wait +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 16601a2..9001b88 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2235,7 +2235,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.read-ahead", + .voltype = "performance/read-ahead", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable read-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, +@@ -2249,7 +2249,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.io-cache", + .voltype = "performance/io-cache", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable io-cache translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT}, +-- +1.8.3.1 + diff --git a/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch new file mode 100644 index 0000000..9fca79e --- /dev/null +++ b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch @@ -0,0 +1,223 @@ +From e2af9793014ad67859aa73088765a52307cbe466 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Tue, 7 Jan 2020 19:43:05 +0100 +Subject: [PATCH 346/346] fuse: degrade logging of write failure to fuse device + +Problem: + +FUSE uses failures of communicating with /dev/fuse with various +errnos to indicate in-kernel conditions to userspace. Some of these +shouldn't be handled as an application error. Also the standard +POSIX errno description should not be shown as they are misleading +in this context. + +Solution: + +When writing to the fuse device, the caller of the respective +convenience routine can mask those errnos which don't qualify to +be an error for the application in that context, so then those +shall be reported at DEBUG level. + +The possible non-standard errnos are reported with their +POSIX name instead of their description to avoid confusion. +(Eg. for ENOENT we don't log "no such file or directory", +we log indeed literal "ENOENT".) + +Upstream on https://review.gluster.org/23974 +> Change-Id: I510158843e4b1d482bdc496c2e97b1860dc1ba93 +> updates: bz#1193929 +> Signed-off-by: Csaba Henk + +BUG: 1763208 +Change-Id: Ib1676bb334ed153ce74ae1c0413fc0e58fb388c7 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/189056 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/src/fuse-bridge.c | 78 +++++++++++++++++++++++++++++++++--- + xlators/mount/fuse/src/fuse-bridge.h | 9 ++++- + 2 files changed, 80 insertions(+), 7 deletions(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index ebe5c28..6e99053 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -198,7 +198,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir, + + static int + check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, +- ssize_t res) ++ ssize_t res, errnomask_t errnomask) + { + char w = 'W'; + struct iovec diov[4] = { +@@ -216,8 +216,59 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + struct fuse_out_header *fouh = NULL; + + if (res == -1) { +- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR, +- "writing to fuse device failed: %s", strerror(errno)); ++ const char *errdesc = NULL; ++ gf_loglevel_t loglevel = GF_LOG_ERROR; ++ ++ /* If caller masked the errno, then it ++ * does not indicate an error at the application ++ * level, so we degrade the log severity to DEBUG. ++ */ ++ if (errnomask && errno < ERRNOMASK_MAX && ++ GET_ERRNO_MASK(errnomask, errno)) ++ loglevel = GF_LOG_DEBUG; ++ ++ switch (errno) { ++ /* The listed errnos are FUSE status indicators, ++ * not legit values according to POSIX (see write(3p)), ++ * so resolving them according to the standard ++ * POSIX interpretation would be misleading. ++ */ ++ case ENOENT: ++ errdesc = "ENOENT"; ++ break; ++ case ENOTDIR: ++ errdesc = "ENOTDIR"; ++ break; ++ case ENODEV: ++ errdesc = "ENODEV"; ++ break; ++ case EPERM: ++ errdesc = "EPERM"; ++ break; ++ case ENOMEM: ++ errdesc = "ENOMEM"; ++ break; ++ case ENOTCONN: ++ errdesc = "ENOTCONN"; ++ break; ++ case ECONNREFUSED: ++ errdesc = "ECONNREFUSED"; ++ break; ++ case EOVERFLOW: ++ errdesc = "EOVERFLOW"; ++ break; ++ case EBUSY: ++ errdesc = "EBUSY"; ++ break; ++ case ENOTEMPTY: ++ errdesc = "ENOTEMPTY"; ++ break; ++ default: ++ errdesc = strerror(errno); ++ } ++ ++ gf_log_callingfn("glusterfs-fuse", loglevel, ++ "writing to fuse device failed: %s", errdesc); + return errno; + } + +@@ -282,7 +333,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, + gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res, + fouh->len, res == -1 ? strerror(errno) : ""); + +- return check_and_dump_fuse_W(priv, iov_out, count, res); ++ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL); + } + + static int +@@ -353,6 +404,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ if (ENOTDIR < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTDIR); ++ if (EBUSY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, EBUSY); ++ if (ENOTEMPTY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTEMPTY); ++ + if (dentry->name) { + nlen = strlen(dentry->name); + fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; +@@ -437,6 +497,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + fniio->off = 0; + fniio->len = -1; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ + fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, + uuid_utoa(inode->gfid)); + gf_log("glusterfs-fuse", GF_LOG_TRACE, +@@ -482,6 +545,7 @@ fuse_timed_message_new(void) + /* should be NULL if not set */ + dmsg->fuse_message_body = NULL; + INIT_LIST_HEAD(&dmsg->next); ++ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask)); + + return dmsg; + } +@@ -680,6 +744,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + dmsg->fuse_out_header.unique = finh->unique; + dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header); + dmsg->fuse_out_header.error = -EAGAIN; ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(dmsg->errnomask, ENOENT); + timespec_now(&dmsg->scheduled_ts); + timespec_adjust_delta(&dmsg->scheduled_ts, + (struct timespec){0, 10000000}); +@@ -4848,7 +4914,7 @@ notify_kernel_loop(void *data) + iov_out.iov_base = node->inval_buf; + iov_out.iov_len = len; + rv = sys_writev(priv->fd, &iov_out, 1); +- check_and_dump_fuse_W(priv, &iov_out, 1, rv); ++ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask); + + GF_FREE(node); + +@@ -4940,7 +5006,7 @@ timed_response_loop(void *data) + iovs[1] = (struct iovec){dmsg->fuse_message_body, + len - sizeof(struct fuse_out_header)}; + rv = sys_writev(priv->fd, iovs, 2); +- check_and_dump_fuse_W(priv, iovs, 2, rv); ++ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask); + + fuse_timed_message_free(dmsg); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index cf4479c..d2d462c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -195,14 +195,20 @@ struct fuse_private { + }; + typedef struct fuse_private fuse_private_t; + ++typedef uint64_t errnomask_t[2]; ++#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63))) ++#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63))) ++#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t))) ++ + #define INVAL_BUF_SIZE \ + (sizeof(struct fuse_out_header) + \ + max(sizeof(struct fuse_notify_inval_inode_out), \ + sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1)) + + struct fuse_invalidate_node { +- char inval_buf[INVAL_BUF_SIZE]; ++ errnomask_t errnomask; + struct list_head next; ++ char inval_buf[INVAL_BUF_SIZE]; + }; + typedef struct fuse_invalidate_node fuse_invalidate_node_t; + +@@ -210,6 +216,7 @@ struct fuse_timed_message { + struct fuse_out_header fuse_out_header; + void *fuse_message_body; + struct timespec scheduled_ts; ++ errnomask_t errnomask; + struct list_head next; + }; + typedef struct fuse_timed_message fuse_timed_message_t; +-- +1.8.3.1 + diff --git a/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch new file mode 100644 index 0000000..ff5251d --- /dev/null +++ b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch @@ -0,0 +1,236 @@ +From 87e6ea2cd63898c5d243b0f0c719f4f6347fb829 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 5 Jan 2017 19:53:19 +0530 +Subject: [PATCH 347/349] tools/glusterfind: handle offline bricks + +Problem: +glusterfind is unable to copy remote output file to local node when a +remove-brick is in progress on the remote node. After copying remote +files, in the --full output listing path, a "sort -u" command is run on +the collected files. However, "sort" exits with an error code if it +finds any file missing. + +Solution: +Maintain a map of (pid, output file) when the node commands are started +and remove the mapping for the pid for which the command returns an +error. Use the list of files present in the map for the "sort" command. + +Backport of: +> Patch: https://review.gluster.org/16332 +> Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +> fixes: bz#1410439 +> Signed-off-by: Milind Changire +> Signed-off-by: Shwetha K Acharya + +BUG: 1789447 +Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189214 +Tested-by: RHGS Build Bot +Reviewed-by: Sunny Kumar +--- + tools/glusterfind/src/gfind_py2py3.py | 25 ++++++++++++++ + tools/glusterfind/src/main.py | 61 +++++++++++++++++++++-------------- + 2 files changed, 61 insertions(+), 25 deletions(-) + +diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py +index 1d41ec5..87324fb 100644 +--- a/tools/glusterfind/src/gfind_py2py3.py ++++ b/tools/glusterfind/src/gfind_py2py3.py +@@ -40,6 +40,19 @@ if sys.version_info >= (3,): + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile.encode()) + ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep)) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep)) ++ ++ + else: + + # Raw conversion of bytearray to string +@@ -61,3 +74,15 @@ else: + + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile) ++ ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep).encode()) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep).encode()) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index cc5a86f..fefe4a3 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -16,6 +16,7 @@ from multiprocessing import Process + import os + import xml.etree.cElementTree as etree + from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action ++from gfind_py2py3 import gfind_write_row, gfind_write + import logging + import shutil + import tempfile +@@ -35,9 +36,9 @@ GlusterFS Incremental API + ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError + + logger = logging.getLogger() +-node_outfiles = [] + vol_statusStr = "" + gtmpfilename = None ++g_pid_nodefile_map = {} + + + class StoreAbsPath(Action): +@@ -111,7 +112,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): + + + def run_cmd_nodes(task, args, **kwargs): +- global node_outfiles ++ global g_pid_nodefile_map + nodes = get_nodes(args.volume) + pool = [] + for num, node in enumerate(nodes): +@@ -142,7 +143,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -180,7 +180,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -264,6 +263,7 @@ def run_cmd_nodes(task, args, **kwargs): + args=(host, host_uuid, task, cmd, args, opts)) + p.start() + pool.append(p) ++ g_pid_nodefile_map[p.pid] = node_outfile + + for num, p in enumerate(pool): + p.join() +@@ -271,8 +271,11 @@ def run_cmd_nodes(task, args, **kwargs): + logger.warn("Command %s failed in %s" % (task, nodes[num][1])) + if task in ["create", "delete"]: + fail("Command %s failed in %s" % (task, nodes[num][1])) +- elif task == "pre" and args.disable_partial: +- sys.exit(1) ++ elif task == "pre" or task == "query": ++ if args.disable_partial: ++ sys.exit(1) ++ else: ++ del g_pid_nodefile_map[p.pid] + + + @cache_output +@@ -512,16 +515,10 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- f.write(u"{0}{1}{2}{3}{4}\n".format(row[0], +- field_separator, +- p_rep, +- field_separator, +- row_2_rep).encode()) +- else: +- f.write(u"{0}{1}{2}\n".format(row[0], +- field_separator, +- p_rep).encode()) ++ gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) + ++ else: ++ gfind_write(f, row[0], field_separator, p_rep) + + def mode_create(session_dir, args): + logger.debug("Init is called - Session: %s, Volume: %s" +@@ -571,6 +568,7 @@ def mode_create(session_dir, args): + + def mode_query(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + # Verify volume status + cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] +@@ -634,14 +632,20 @@ def mode_query(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -656,6 +660,7 @@ def mode_query(session_dir, args): + + def mode_pre(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + """ + Read from Session file and write to session.pre file +@@ -696,14 +701,20 @@ def mode_pre(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch new file mode 100644 index 0000000..e1f89f9 --- /dev/null +++ b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch @@ -0,0 +1,113 @@ +From 1ca8a545833e0a6e674984245338b8675ddc58bc Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 10 Jan 2020 16:48:14 +0530 +Subject: [PATCH 348/349] glusterfind: Fix py2/py3 issues + +1. In dictionary values(), returns list in py2 and not in py3. + So explicitly convert it into list. +2. xattr module returns values in bytes. So explicitly convert + them to str to work both with py2 and py3 + +Backport of: + > Patch: https://review.gluster.org/23993 + > fixes: bz#1789439 + > Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 + > Signed-off-by: Kotresh HR + +BUG: 1789447 +Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189215 +Reviewed-by: Shwetha Acharya +Tested-by: RHGS Build Bot +Reviewed-by: Hari Gowtham Gopal +Reviewed-by: Sunny Kumar +--- + tools/glusterfind/src/changelog.py | 14 +++++++++----- + tools/glusterfind/src/main.py | 8 ++++---- + 2 files changed, 13 insertions(+), 9 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index d8f97e0..d972fb5 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -14,6 +14,7 @@ import sys + import time + import xattr + import logging ++from gfind_py2py3 import bytearray_to_str + from argparse import ArgumentParser, RawDescriptionHelpFormatter + import hashlib + try: +@@ -105,9 +106,10 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.pgfid."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.pgfid."): + # PGFID in pgfid table +- changelog_data.pgfid_add(x.split(".")[-1]) ++ changelog_data.pgfid_add(x_str.split(".")[-1]) + except (IOError, OSError): + # All OS Errors ignored, since failures will be logged + # in End. All GFIDs present in gfidpath table +@@ -122,10 +124,12 @@ def enum_hard_links_using_gfid2path(brick, gfid, args): + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.gfid2path."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. / +- v = xattr.getxattr(p, x) +- pgfid, bn = v.split(os.sep) ++ v = xattr.getxattr(p, x_str) ++ v_str = bytearray_to_str(v) ++ pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index fefe4a3..dfc9d07 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -633,7 +633,7 @@ def mode_query(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -645,7 +645,7 @@ def mode_query(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -702,7 +702,7 @@ def mode_pre(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -714,7 +714,7 @@ def mode_pre(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0349-glusterfind-python3-compatibility.patch b/SOURCES/0349-glusterfind-python3-compatibility.patch new file mode 100644 index 0000000..7f1c274 --- /dev/null +++ b/SOURCES/0349-glusterfind-python3-compatibility.patch @@ -0,0 +1,56 @@ +From 1354a492cbc758f9801568153380ca896fab7765 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 10 Jan 2020 14:28:35 +0000 +Subject: [PATCH 349/349] glusterfind: python3 compatibility + +Problem: +While we delete gluster volume the hook script 'S57glusterfind-delete-post.py' +is failed to execute and error message can be observed in glusterd log. + +Traceback: + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 69, in + main() + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 39, in main + glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind") + File "/usr/lib64/python3.7/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.7/genericpath.py", line 155, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: + +Added the 'universal_newlines' flag to Popen to support backward compatibility. + +Backport of: + > Patch: https://review.gluster.org/23994 + > Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f + > Fixes: bz#1789478 + > Signed-off-by: Sunny Kumar + +Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f +BUG: 1789447 +Signed-off-by: Sunny Kumar +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189216 +Tested-by: RHGS Build Bot +--- + tools/glusterfind/S57glusterfind-delete-post.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py +index 5b5142d..5beece2 100755 +--- a/tools/glusterfind/S57glusterfind-delete-post.py ++++ b/tools/glusterfind/S57glusterfind-delete-post.py +@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info): + + def get_glusterd_workdir(): + p = Popen(["gluster", "system::", "getwd"], +- stdout=PIPE, stderr=PIPE) ++ stdout=PIPE, stderr=PIPE, universal_newlines=True) + + out, _ = p.communicate() + +-- +1.8.3.1 + diff --git a/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch new file mode 100644 index 0000000..08f70a7 --- /dev/null +++ b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch @@ -0,0 +1,37 @@ +From 6c06ac0571fb6bf0734b173cc3a75badc7554601 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Tue, 14 Jan 2020 10:51:06 +0530 +Subject: [PATCH 350/350] tools/glusterfind: Remove an extra argument + +Backport of: +> Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24011/ +> fixes: bz#1790748 +> Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +> Signed-off-by: Shwetha K Acharya + +Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +BUG: 1789447 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/189363 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/main.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index dfc9d07..5ca1fec 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -515,7 +515,7 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) ++ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep) + + else: + gfind_write(f, row[0], field_separator, p_rep) +-- +1.8.3.1 + diff --git a/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch new file mode 100644 index 0000000..51dc3bb --- /dev/null +++ b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch @@ -0,0 +1,131 @@ +From f38f0988eb6c0d72677abceba5ebeb51ea8d44ad Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 21 Jan 2020 21:09:56 +0530 +Subject: [PATCH 351/351] server: Mount fails after reboot 1/3 gluster nodes + +Problem: At the time of coming up one server node(1x3) after reboot +client is unmounted.The client is unmounted because a client +is getting AUTH_FAILED event and client call fini for the graph.The +client is getting AUTH_FAILED because brick is not attached with a +graph at that moment + +Solution: To avoid the unmounting the client graph throw ENOENT error + from server in case if brick is not attached with server at + the time of authenticate clients. + +> Credits: Xavi Hernandez +> Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +> Fixes: bz#1793852 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit e4f776308d5ee7ffeb07de0fd9e1edae6944030d) +> (Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24053/) + +Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +BUG: 1793035 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/190042 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/protocol/bug-1433815-auth-allow.t | 1 + + xlators/protocol/client/src/client-handshake.c | 3 +- + xlators/protocol/server/src/server-handshake.c | 41 +++++++++++++++++--------- + 3 files changed, 29 insertions(+), 16 deletions(-) + +diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t +index fa22ad8..a78c0eb 100644 +--- a/tests/bugs/protocol/bug-1433815-auth-allow.t ++++ b/tests/bugs/protocol/bug-1433815-auth-allow.t +@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0 + # Set auth.allow so it *doesn't* include ourselves. + TEST $CLI volume set $V0 auth.allow 1.2.3.4 + TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + + # "System getspec" will include the username and password if the request comes + # from a server (which we are). Unfortunately, this will cause authentication +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index c43756a..0002361 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -1031,8 +1031,7 @@ client_setvolume_cbk(struct rpc_req *req, struct iovec *iov, int count, + "SETVOLUME on remote-host failed: %s", remote_error); + + errno = op_errno; +- if (remote_error && +- (strcmp("Authentication failed", remote_error) == 0)) { ++ if (remote_error && (op_errno == EACCES)) { + auth_fail = _gf_true; + op_ret = 0; + } +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 382f241..1d1177d 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -250,6 +250,7 @@ server_setvolume(rpcsvc_request_t *req) + char *subdir_mount = NULL; + char *client_name = NULL; + gf_boolean_t cleanup_starting = _gf_false; ++ gf_boolean_t xlator_in_graph = _gf_true; + + params = dict_new(); + reply = dict_new(); +@@ -311,8 +312,10 @@ server_setvolume(rpcsvc_request_t *req) + LOCK(&ctx->volfile_lock); + { + xl = get_xlator_by_name(this, name); +- if (!xl) ++ if (!xl) { ++ xlator_in_graph = _gf_false; + xl = this; ++ } + } + UNLOCK(&ctx->volfile_lock); + if (xl == NULL) { +@@ -568,20 +571,30 @@ server_setvolume(rpcsvc_request_t *req) + "failed to set error " + "msg"); + } else { +- gf_event(EVENT_CLIENT_AUTH_REJECT, +- "client_uid=%s;" +- "client_identifier=%s;server_identifier=%s;" +- "brick_path=%s", +- client->client_uid, req->trans->peerinfo.identifier, +- req->trans->myinfo.identifier, name); +- gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, +- "Cannot authenticate client" +- " from %s %s", +- client->client_uid, (clnt_version) ? clnt_version : "old"); +- + op_ret = -1; +- op_errno = EACCES; +- ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ if (!xlator_in_graph) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOENT, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s because brick is not attached in graph", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = ENOENT; ++ ret = dict_set_str(reply, "ERROR", "Brick not found"); ++ } else { ++ gf_event(EVENT_CLIENT_AUTH_REJECT, ++ "client_uid=%s;" ++ "client_identifier=%s;server_identifier=%s;" ++ "brick_path=%s", ++ client->client_uid, req->trans->peerinfo.identifier, ++ req->trans->myinfo.identifier, name); ++ gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = EACCES; ++ ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ } + if (ret < 0) + gf_msg_debug(this->name, 0, + "failed to set error " +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index 4e2fa5b..84a0141 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 12%{?dist} +Release: 29%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -585,6 +585,81 @@ Patch0273: 0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch Patch0274: 0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch Patch0275: 0275-cluster-ec-Create-heal-task-with-heal-process-id.patch Patch0276: 0276-features-utime-always-update-ctime-at-setattr.patch +Patch0277: 0277-geo-rep-Fix-Config-Get-Race.patch +Patch0278: 0278-geo-rep-Fix-worker-connection-issue.patch +Patch0279: 0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch +Patch0280: 0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch +Patch0281: 0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch +Patch0282: 0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch +Patch0283: 0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch +Patch0284: 0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch +Patch0285: 0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch +Patch0286: 0286-glusterfs.spec.in-added-script-files-for-machine-com.patch +Patch0287: 0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch +Patch0288: 0288-cluster-ec-Fix-coverity-issues.patch +Patch0289: 0289-cluster-ec-quorum-count-implementation.patch +Patch0290: 0290-glusterd-tag-disperse.quorum-count-for-31306.patch +Patch0291: 0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch +Patch0292: 0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch +Patch0293: 0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch +Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch +Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch +Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch +Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch +Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch +Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch +Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch +Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch +Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch +Patch0303: 0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch +Patch0304: 0304-cluster-dht-Correct-fd-processing-loop.patch +Patch0305: 0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch +Patch0306: 0306-cli-fix-distCount-value.patch +Patch0307: 0307-ssl-fix-RHEL8-regression-failure.patch +Patch0308: 0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch +Patch0309: 0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch +Patch0310: 0310-tests-test-case-for-non-root-geo-rep-setup.patch +Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch +Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch +Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch +Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch +Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch +Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch +Patch0317: 0317-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0318: 0318-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch +Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch +Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch +Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch +Patch0323: 0323-cli-display-detailed-rebalance-info.patch +Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch +Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch +Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch +Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch +Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch +Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch +Patch0330: 0330-mount.glusterfs-change-the-error-message.patch +Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch +Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch +Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch +Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch +Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch +Patch0336: 0336-spec-check-and-return-exit-code-in-rpm-scripts.patch +Patch0337: 0337-fuse-Set-limit-on-invalidate-queue-size.patch +Patch0338: 0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch +Patch0339: 0339-geo-rep-fix-integer-config-validation.patch +Patch0340: 0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch +Patch0341: 0341-socket-fix-error-handling.patch +Patch0342: 0342-Revert-hooks-remove-selinux-hooks.patch +Patch0343: 0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch +Patch0344: 0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch +Patch0345: 0345-read-ahead-io-cache-turn-off-by-default.patch +Patch0346: 0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch +Patch0347: 0347-tools-glusterfind-handle-offline-bricks.patch +Patch0348: 0348-glusterfind-Fix-py2-py3-issues.patch +Patch0349: 0349-glusterfind-python3-compatibility.patch +Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch +Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -984,7 +1059,7 @@ This package provides the translators needed on any GlusterFS client. %package events Summary: GlusterFS Events Requires: %{name}-server%{?_isa} = %{version}-%{release} -Requires: python%{_pythonver} python%{_pythonver}-prettytable +Requires: python%{_pythonver} Requires: python%{_pythonver}-gluster = %{version}-%{release} %if ( 0%{?rhel} && 0%{?rhel} < 8 ) Requires: python-requests @@ -992,7 +1067,10 @@ Requires: python-requests Requires: python%{_pythonver}-requests %endif %if ( 0%{?rhel} && 0%{?rhel} < 7 ) +Requires: python-prettytable Requires: python-argparse +%else +Requires: python%{_pythonver}-prettytable %endif %if ( 0%{?_with_systemd:1} ) %{?systemd_requires} @@ -1458,6 +1536,9 @@ exit 0 %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh %endif +%{_datadir}/glusterfs/scripts/identify-hangs.sh +%{_datadir}/glusterfs/scripts/collect-system-stats.sh +%{_datadir}/glusterfs/scripts/log_accounting.sh # xlators that are needed on the client- and on the server-side %dir %{_libdir}/glusterfs %dir %{_libdir}/glusterfs/%{version}%{?prereltag} @@ -1703,6 +1784,8 @@ exit 0 %if ( 0%{!?_without_server:1} ) %files server %doc extras/clear_xattrs.sh +%{_datadir}/glusterfs/scripts/xattr_analysis.py* +%{_datadir}/glusterfs/scripts/quota_fsck.py* # sysconf %config(noreplace) %{_sysconfdir}/glusterfs %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol @@ -1914,8 +1997,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -1948,8 +2032,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -1982,8 +2067,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2016,8 +2102,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2049,8 +2136,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2082,8 +2170,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2117,8 +2206,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2152,8 +2242,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2187,8 +2278,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2223,8 +2315,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2258,8 +2351,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %posttrans server @@ -2293,8 +2387,59 @@ fi %endif %changelog -* Tue Oct 29 2019 CentOS Sources - 6.0-12.el7.centos -- remove vendor and/or packager lines +* Thu Jan 23 2020 Rinku Kothiya - 6.0-29 +- fixes bugs bz#1793035 + +* Tue Jan 14 2020 Rinku Kothiya - 6.0-28 +- fixes bugs bz#1789447 + +* Mon Jan 13 2020 Rinku Kothiya - 6.0-27 +- fixes bugs bz#1789447 + +* Fri Jan 10 2020 Rinku Kothiya - 6.0-26 +- fixes bugs bz#1763208 bz#1788656 + +* Mon Dec 23 2019 Rinku Kothiya - 6.0-25 +- fixes bugs bz#1686800 bz#1763208 bz#1779696 bz#1781444 bz#1782162 + +* Thu Nov 28 2019 Rinku Kothiya - 6.0-24 +- fixes bugs bz#1768786 + +* Thu Nov 21 2019 Rinku Kothiya - 6.0-23 +- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 + bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486 + bz#1762180 bz#1764095 bz#1766640 + +* Thu Nov 14 2019 Rinku Kothiya - 6.0-22 +- fixes bugs bz#1771524 bz#1771614 + +* Fri Oct 25 2019 Rinku Kothiya - 6.0-21 +- fixes bugs bz#1765555 + +* Wed Oct 23 2019 Rinku Kothiya - 6.0-20 +- fixes bugs bz#1719171 bz#1763412 bz#1764202 + +* Thu Oct 17 2019 Rinku Kothiya - 6.0-19 +- fixes bugs bz#1760939 + +* Wed Oct 16 2019 Rinku Kothiya - 6.0-18 +- fixes bugs bz#1758432 + +* Fri Oct 11 2019 Rinku Kothiya - 6.0-17 +- fixes bugs bz#1704562 bz#1758618 bz#1760261 + +* Wed Oct 09 2019 Rinku Kothiya - 6.0-16 +- fixes bugs bz#1752713 bz#1756325 + +* Fri Sep 27 2019 Rinku Kothiya - 6.0-15 +- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227 + +* Fri Sep 20 2019 Sunil Kumar Acharya - 6.0-14 +- fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970 + bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163 + +* Fri Aug 23 2019 Rinku Kothiya - 6.0-13 +- fixes bugs bz#1729915 bz#1732376 bz#1743611 bz#1743627 bz#1743634 bz#1744518 * Fri Aug 09 2019 Sunil Kumar Acharya - 6.0-12 - fixes bugs bz#1730914 bz#1731448 bz#1732770 bz#1732792 bz#1733531