From 5c188a0f989c50985584272dc5cef39177832708 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 17 Mar 2016 19:42:00 +0530 Subject: [PATCH 73/80] cluster/afr: Fix partial heals in 3-way replication Problem: When there are 2 sources and one sink and if two self-heal daemons try to acquire locks at the same time, there is a chance that it gets a lock on one source and sink leading partial to heal. This will need one more heal from the remaining source to sink for the complete self-heal. This is not optimal. Fix: Upgrade non-blocking locks to blocking lock on all the subvolumes, if the number of locks acquired is majority and there were eagains. >BUG: 1318751 >Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5 >Signed-off-by: Pranith Kumar K >Reviewed-on: http://review.gluster.org/13766 >Smoke: Gluster Build System >NetBSD-regression: NetBSD Build System >CentOS-regression: Gluster Build System >Reviewed-by: Ravishankar N >(cherry picked from commit 8deedef565df49def75083678f8d1558c7b1f7d3) >Change-Id: Ia164360dc1474a717f63633f5deb2c39cc15017c >BUG: 1327863 >Signed-off-by: Pranith Kumar K >Reviewed-on: http://review.gluster.org/14008 >Smoke: Gluster Build System >NetBSD-regression: NetBSD Build System >CentOS-regression: Gluster Build System BUG: 1314724 Change-Id: I9ddea45d827a1cedba3170913b7190a9564449a0 Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/72371 --- xlators/cluster/afr/src/afr-self-heal-common.c | 121 ++++++++++++++++++++- xlators/cluster/afr/src/afr-self-heal-data.c | 10 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 7 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +- xlators/cluster/afr/src/afr-self-heal.h | 13 ++- 5 files changed, 138 insertions(+), 15 deletions(-) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b3dbc95..130388d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -727,11 +727,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, - int source, unsigned char *healed_sinks) + int source, unsigned char *sources, + unsigned char *healed_sinks) { char *status = NULL; char *sinks_str = NULL; char *p = NULL; + char *sources_str = NULL; + char *q = NULL; afr_private_t *priv = NULL; gf_loglevel_t loglevel = GF_LOG_NONE; int i = 0; @@ -739,10 +742,18 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, priv = this->private; sinks_str = alloca0 (priv->child_count * 8); p = sinks_str; + sources_str = alloca0 (priv->child_count * 8); + q = sources_str; for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; - p += sprintf (p, "%d ", i); + if (healed_sinks[i]) + p += sprintf (p, "%d ", i); + if (sources[i]) { + if (source == i) { + q += sprintf (q, "[%d] ", i); + } else { + q += sprintf (q, "%d ", i); + } + } } if (ret < 0) { @@ -755,8 +766,8 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, gf_msg (this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. " - "source=%d sinks=%s", status, type, uuid_utoa (gfid), - source, sinks_str); + "sources=%s sinks=%s", status, type, uuid_utoa (gfid), + sources_str, sinks_str); } int @@ -1012,6 +1023,67 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, return afr_locked_fill (frame, this, locked_on); } +static void +afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies, + int *lock_count, int *eagain_count) +{ + int i = 0; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == 0) { + (*lock_count)++; + } else if (replies[i].op_ret == -1 && + replies[i].op_errno == EAGAIN) { + (*eagain_count)++; + } + } +} + +/*Do blocking locks if number of locks acquired is majority and there were some + * EAGAINs. Useful for odd-way replication*/ +int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, off_t off, + size_t size, unsigned char *locked_on) +{ + loc_t loc = {0,}; + struct gf_flock flock = {0, }; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; + + priv = this->private; + local = frame->local; + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; + + AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLK, &flock, NULL); + + afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, + &eagain_count); + + if (lock_count > priv->child_count/2 && eagain_count) { + afr_locked_fill (frame, this, locked_on); + afr_selfheal_uninodelk (frame, this, inode, dom, off, + size, locked_on); + + AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLKW, &flock, NULL); + } + + loc_wipe (&loc); + + return afr_locked_fill (frame, this, locked_on); +} int afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, @@ -1092,6 +1164,43 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, return afr_locked_fill (frame, this, locked_on); } +int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, const char *name, + unsigned char *locked_on) +{ + loc_t loc = {0,}; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; + + priv = this->private; + local = frame->local; + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + + AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, + name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + + afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, + &eagain_count); + + if (lock_count > priv->child_count/2 && eagain_count) { + afr_locked_fill (frame, this, locked_on); + afr_selfheal_unentrylk (frame, this, inode, dom, name, + locked_on); + + AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, + &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } + + loc_wipe (&loc); + + return afr_locked_fill (frame, this, locked_on); +} + int afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 332471c..67bb8e6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -786,7 +786,7 @@ out: if (did_sh) afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, - healed_sinks); + sources, healed_sinks); else ret = 1; @@ -844,8 +844,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on = alloca0 (priv->child_count); - ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0, - locked_on); + ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode, + priv->sh_domain, 0, 0, + locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { gf_msg_debug (this->name, 0, "%s: Skipping " @@ -864,7 +865,8 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = __afr_selfheal_data (frame, this, fd, locked_on); } unlock: - afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on); + afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, + locked_on); if (fd) fd_unref (fd); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 0837e5a..a93c640 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -704,7 +704,7 @@ postop_unlock: out: if (did_sh) afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, - healed_sinks); + sources, healed_sinks); else ret = 1; @@ -759,8 +759,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on = alloca0 (priv->child_count); long_name_locked = alloca0 (priv->child_count); - ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL, - locked_on); + ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode, + priv->sh_domain, NULL, + locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { gf_msg_debug (this->name, 0, "%s: Skipping " diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 778f2a1..d6daadc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -427,7 +427,7 @@ unlock: if (did_sh) afr_log_selfheal (inode->gfid, this, ret, "metadata", source, - healed_sinks); + sources, healed_sinks); else ret = 1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index b0f545f..e0a3341 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -113,6 +113,11 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on); int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, off_t off, + size_t size, unsigned char *locked_on); + +int afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, off_t off, size_t size, const unsigned char *locked_on); @@ -126,6 +131,11 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, const char *name, unsigned char *locked_on); int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, const char *name, + unsigned char *locked_on); + +int afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, char *dom, const char *name, unsigned char *locked_on); @@ -197,7 +207,8 @@ afr_success_count (struct afr_reply *replies, unsigned int count); void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, - int source, unsigned char *healed_sinks); + int source, unsigned char *sources, + unsigned char *healed_sinks); void afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, -- 1.7.1