From 2faa19e96c5c1a249c0e6e8095e3e8eeaa3954a5 Mon Sep 17 00:00:00 2001 From: Poornima G Date: Wed, 17 Aug 2016 20:19:59 +0530 Subject: [PATCH 131/141] io-stats: Add stats for upcall notifications With this patch, there will be additional entries seen in the profile info: UPCALL : Total number of upcall events that were sent from the brick(in brick profile), and number of upcall notifications recieved by client(in client profile) Cache invalidation events: ------------------------- CI_IATT : Number of upcalls that were cache invalidation and had one of the IATT_UPDATE_FLAGS set. This indicates that one of the iatt value was changed. CI_XATTR : Number of upcalls that were cache invalidation, and had one of the UP_XATTR or UP_XATTR_RM set. This indicates that an xattr was updated or deleted. CI_RENAME : Number of upcalls that were cache invalidation, resulted by the renaming of a file or directory CI_UNLINK : Number of upcalls that were cache invalidation, resulted by the unlink of a file. CI_FORGET : Number of upcalls that were cache invalidation, resulted by the forget of inode on the server side. Lease events: ------------ LEASE_RECALL : Number of lease recalls sent by the brick (in brick profile), and number of lease recalls recieved by client(in client profile) Note that the sum of CI_IATT, CI_XATTR, CI_RENAME, CI_UNLINK, CI_FORGET, LEASE_RECALL may not be equal to UPCALL. This is because, each cache invalidation can carry multiple flags. Eg: - Every CI_XATTR will have CI_IATT - Every CI_UNLINK will also increment CI_IATT as link count is an iatt attribute. Also UP_PARENT_DENTRY_FLAGS is currently not accounted for, as CI_RENAME and CI_UNLINK will always have the flag UP_PARENT_DENTRY_FLAGS Change-Id: Ieb8cd21dde2c4c7618f12d025a5e5156f9cc0fe9 BUG: 1284873 Signed-off-by: Poornima G Reviewed-on: http://review.gluster.org/15193 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Rajesh Joseph CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Reviewed-on: https://code.engineering.redhat.com/gerrit/87044 Tested-by: Rajesh Joseph --- cli/src/cli-rpc-ops.c | 25 +++++++++ cli/src/cli-xml-output.c | 44 ++++++++++++++++- libglusterfs/src/globals.c | 13 +++++ libglusterfs/src/globals.h | 1 + rpc/xdr/src/glusterfs-fops.x | 12 +++++ tests/bugs/upcall/bug-upcall-stat.t | 38 ++++++++++++++ xlators/debug/io-stats/src/io-stats.c | 87 +++++++++++++++++++++++++++++++++ 7 files changed, 219 insertions(+), 1 deletions(-) create mode 100755 tests/bugs/upcall/bug-upcall-stat.t diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index e8fc658..58884a2 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -30,6 +30,7 @@ #include "protocol-common.h" #include "cli-mem-types.h" #include "compat.h" +#include "upcall-utils.h" #include "syscall.h" #include "glusterfs3.h" @@ -6293,6 +6294,7 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval) uint64_t rb_counts[32] = {0}; uint64_t wb_counts[32] = {0}; cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}}; + cli_profile_info_t upcall_info[GF_UPCALL_FLAGS_MAXVALUE] = {{0},}; char output[128] = {0}; int per_line = 0; char read_blocks[128] = {0}; @@ -6316,6 +6318,13 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval) ret = dict_get_uint64 (dict, key, &wb_counts[i]); } + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + snprintf (key, sizeof (key), "%d-%d-%d-upcall-hits", count, + interval, i); + ret = dict_get_uint64 (dict, key, &upcall_info[i].fop_hits); + upcall_info[i].fop_name = (char *)gf_upcall_list[i]; + } + for (i = 0; i < GF_FOP_MAXVALUE; i++) { memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%d-%d-%d-hits", count, @@ -6440,6 +6449,22 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval) profile_info[i].fop_name); } } + + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + if (upcall_info[i].fop_hits == 0) + continue; + if (upcall_info[i].fop_hits) { + cli_out ("%10.2lf %10.2lf us %10.2lf us %10.2lf us" + " %14"PRId64" %11s", + upcall_info[i].percentage_avg_latency, + upcall_info[i].avg_latency, + upcall_info[i].min_latency, + upcall_info[i].max_latency, + upcall_info[i].fop_hits, + upcall_info[i].fop_name); + } + } + cli_out (" "); cli_out ("%12s: %"PRId64" seconds", "Duration", sec); cli_out ("%12s: %"PRId64" bytes", "Data Read", r_count); diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index 6d1e617..f249e40 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -13,7 +13,7 @@ #include "run.h" #include "compat.h" #include "syscall.h" - +#include "upcall-utils.h" enum gf_task_types { GF_TASK_TYPE_REBALANCE, @@ -2200,6 +2200,48 @@ cont: max_latency = 0.0; } + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + hits = 0; + avg_latency = 0.0; + min_latency = 0.0; + max_latency = 0.0; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%d-%d-%d-upcall-hits", brick_index, + interval, i); + ret = dict_get_uint64 (dict, key, &hits); + if (ret) + continue; + + /* */ + ret = xmlTextWriterStartElement (writer, (xmlChar *)"fop"); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = xmlTextWriterWriteFormatElement + (writer, (xmlChar *)"name", "%s", gf_fop_list[i]); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = xmlTextWriterWriteFormatElement + (writer, (xmlChar *)"hits", "%"PRIu64, hits); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = xmlTextWriterWriteFormatElement + (writer, (xmlChar *)"avgLatency", "%f", avg_latency); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = xmlTextWriterWriteFormatElement + (writer, (xmlChar *)"minLatency", "%f", min_latency); + XML_RET_CHECK_AND_GOTO (ret, out); + + ret = xmlTextWriterWriteFormatElement + (writer, (xmlChar *)"maxLatency", "%f", max_latency); + XML_RET_CHECK_AND_GOTO (ret, out); + + /* */ + ret = xmlTextWriterEndElement (writer); + XML_RET_CHECK_AND_GOTO (ret, out); + } + /* */ ret = xmlTextWriterEndElement (writer); XML_RET_CHECK_AND_GOTO (ret, out); diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 50ef4db..c537971 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -16,6 +16,7 @@ #include "mem-pool.h" #include "syncop.h" #include "libglusterfs-messages.h" +#include "upcall-utils.h" const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_NULL] = "NULL", @@ -74,6 +75,18 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_GETACTIVELK] = "GETACTIVELK", [GF_FOP_SETACTIVELK] = "SETACTIVELK", }; + +const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { + [GF_UPCALL_NULL] = "NULL", + [GF_UPCALL] = "UPCALL", + [GF_UPCALL_CI_STAT] = "CI_IATT", + [GF_UPCALL_CI_XATTR] = "CI_XATTR", + [GF_UPCALL_CI_RENAME] = "CI_RENAME", + [GF_UPCALL_CI_NLINK] = "CI_UNLINK", + [GF_UPCALL_CI_FORGET] = "CI_FORGET", + [GF_UPCALL_LEASE_RECALL] = "LEASE_RECALL", +}; + /* THIS */ /* This global ctx is a bad hack to prevent some of the libgfapi crashes. diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index 514109a..b12b0ad 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -129,6 +129,7 @@ char *glusterfs_leaseid_buf_get (); int glusterfs_globals_init (glusterfs_ctx_t *ctx); extern const char *gf_fop_list[]; +extern const char *gf_upcall_list[]; /* mem acct enable/disable */ int gf_global_mem_acct_enable_get (void); diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x index 3b6b649..4bfdd4b 100644 --- a/rpc/xdr/src/glusterfs-fops.x +++ b/rpc/xdr/src/glusterfs-fops.x @@ -197,3 +197,15 @@ enum gf_seek_what_t { GF_SEEK_DATA, GF_SEEK_HOLE }; + +enum gf_upcall_flags_t { + GF_UPCALL_NULL, + GF_UPCALL, + GF_UPCALL_CI_STAT, + GF_UPCALL_CI_XATTR, + GF_UPCALL_CI_RENAME, + GF_UPCALL_CI_NLINK, + GF_UPCALL_CI_FORGET, + GF_UPCALL_LEASE_RECALL, + GF_UPCALL_FLAGS_MAXVALUE +}; diff --git a/tests/bugs/upcall/bug-upcall-stat.t b/tests/bugs/upcall/bug-upcall-stat.t new file mode 100755 index 0000000..b229c13 --- /dev/null +++ b/tests/bugs/upcall/bug-upcall-stat.t @@ -0,0 +1,38 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; +TEST glusterd; + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1}; + +TEST $CLI volume set $V0 performance.md-cache-timeout 600 +TEST $CLI volume set $V0 performance.cache-samba-metadata on +TEST $CLI volume set $V0 features.cache-invalidation on +TEST $CLI volume set $V0 features.cache-invalidation-timeout 600 + +#TEST $CLI volume stop $V0 +TEST $CLI volume start $V0 + +## 5. Create two gluster mounts +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 + +TEST $CLI volume profile $V0 start + +## 8. Create a file +TEST touch $M0/file1 + +TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1" +TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc" +TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1" +sleep 2; #There can be a very very small window where the next getxattr + #reaches md-cache, before the cache-invalidation caused by previous + #setxattr, reaches md-cache. Hence sleeping for 2 sec. + #Also it should not be > 600. +TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz" + +$CLI volume profile $V0 info | grep -q CI_XATTR +EXPECT '0' echo $? diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 43fba57..c2375d9 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -37,6 +37,7 @@ #include "statedump.h" #include #include +#include "upcall-utils.h" #define MAX_LIST_MEMBERS 100 #define DEFAULT_PWD_BUF_SZ 16384 @@ -119,6 +120,7 @@ struct ios_global_stats { uint64_t block_count_write[32]; uint64_t block_count_read[32]; uint64_t fop_hits[GF_FOP_MAXVALUE]; + uint64_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE]; struct timeval started_at; struct ios_lat latency[GF_FOP_MAXVALUE]; uint64_t nr_opens; @@ -365,6 +367,19 @@ is_fop_latency_started (call_frame_t *frame) throughput, iosstat); \ } while (0) +#define BUMP_UPCALL(event) \ + do { \ + struct ios_conf *conf = NULL; \ + \ + conf = this->private; \ + if (!conf) \ + break; \ + if (conf->count_fop_hits) { \ + conf->cumulative.upcall_hits[event]++; \ + conf->incremental.upcall_hits[event]++; \ + } \ + } while (0) + int ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd) { @@ -883,6 +898,26 @@ io_stats_dump_global_to_json_logfp (xlator_t *this, "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",", key_prefix, str_prefix, lc_fop_name, fop_lat_max); } + + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + lc_fop_name = strdupa (gf_upcall_list[i]); + for (j = 0; lc_fop_name[j]; j++) { + lc_fop_name[j] = tolower (lc_fop_name[j]); + } + fop_hits = stats->upcall_hits[i]; + if (interval == -1) { + ios_log (this, logfp, + "\"%s.%s.fop.%s.count\": \"%"PRId64"\",", + key_prefix, str_prefix, lc_fop_name, + fop_hits); + } else { + ios_log (this, logfp, + "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",", + key_prefix, str_prefix, lc_fop_name, + (double)(fop_hits / interval_sec)); + } + } + if (interval == -1) { ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",", key_prefix, str_prefix, @@ -1233,6 +1268,14 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats, stats->fop_hits[i], stats->latency[i].avg, stats->latency[i].min, stats->latency[i].max); } + + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + if (stats->upcall_hits[i]) + ios_log (this, logfp, "%-13s %10"PRId64" %11s " + "us %11s us %11s us", gf_upcall_list[i], + stats->upcall_hits[i], "0", "0", "0"); + } + ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- " " ----- ----- ----- -----\n"); @@ -1415,6 +1458,19 @@ io_stats_dump_global_to_dict (xlator_t *this, struct ios_global_stats *stats, goto out; } } + for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) { + if (stats->upcall_hits[i] == 0) + continue; + snprintf (key, sizeof (key), "%d-%d-upcall-hits", interval, i); + ret = dict_set_uint64 (dict, key, stats->upcall_hits[i]); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "set %s-upcall-hits: %"PRIu64, + gf_upcall_list[i], + stats->upcall_hits[i]); + goto out; + } + } out: gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); return ret; @@ -3780,6 +3836,8 @@ notify (xlator_t *this, int32_t event, void *data, ...) double time = 0; gf_boolean_t is_peek = _gf_false; va_list ap; + struct gf_upcall *up_data = NULL; + struct gf_upcall_cache_invalidation *up_ci = NULL; dict = data; va_start (ap, data); @@ -3875,6 +3933,35 @@ notify (xlator_t *this, int32_t event, void *data, ...) } } break; + case GF_EVENT_UPCALL: + up_data = (struct gf_upcall *)data; + BUMP_UPCALL (GF_UPCALL); + + switch (up_data->event_type) { + case GF_UPCALL_RECALL_LEASE: + BUMP_UPCALL (GF_UPCALL_LEASE_RECALL); + break; + case GF_UPCALL_CACHE_INVALIDATION: + up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; + if (up_ci->flags & (UP_XATTR | UP_XATTR_RM)) + BUMP_UPCALL (GF_UPCALL_CI_XATTR); + if (up_ci->flags & IATT_UPDATE_FLAGS) + BUMP_UPCALL (GF_UPCALL_CI_STAT); + if (up_ci->flags & UP_RENAME_FLAGS) + BUMP_UPCALL (GF_UPCALL_CI_RENAME); + if (up_ci->flags & UP_FORGET) + BUMP_UPCALL (GF_UPCALL_CI_FORGET); + if (up_ci->flags & UP_NLINK) + BUMP_UPCALL (GF_UPCALL_CI_NLINK); + break; + default: + gf_msg_debug (this->name, 0, "Unknown upcall event " + "type :%d", up_data->event_type); + break; + } + + default_notify (this, event, data); + break; default: default_notify (this, event, data); break; -- 1.7.1