a3470f
From ea20e0a38c9f150d9e96076e04f4b77109e41663 Mon Sep 17 00:00:00 2001
a3470f
From: Mohit Agrawal <moagrawa@redhat.com>
a3470f
Date: Wed, 27 Sep 2017 11:37:28 +0530
a3470f
Subject: [PATCH 091/128] extras: scripts to control CPU/MEMORY for any gluster
a3470f
 daemon during runtime
a3470f
a3470f
Problem: Sometime gluster daemons like glustershd can consume a lot of cpu and/
a3470f
or memory if there is a large amount of data/ entries to be healed.
a3470f
a3470f
Solution: Until we have some form of throttling/ QoS mechanisms built into
a3470f
gluster, we can use control groups for regulating cpu and memory of any gluster
a3470f
daemon using control-cpu-load.sh and control-mem.sh scripts respectively.
a3470f
a3470f
Test:    To test the control-cpu-load.sh script follow below procedure:
a3470f
         1) Setup distribute replica environment
a3470f
         2) Selfheal daemon off
a3470f
         3) Down one node from replica nodes
a3470f
         4) Create millions of files from mount point
a3470f
         5) Start down node
a3470f
         6) Check cpu usage for shd process in top command
a3470f
         7) Run script after provide shd pid with CPU quota value
a3470f
         8) Check again cpu usage for shd process in top command
a3470f
a3470f
Note: control-mem.sh script can cap the memory usage of the process to the set
a3470f
limit, beyond which the process gets blocked. It resumes either when the memory
a3470f
usage comes down or if the limit is increased.
a3470f
a3470f
> BUG: 1496335
a3470f
> Change-Id: Id73c36b73ca600fa9f7905d84053d1e8633c996f
a3470f
> Reviewed on https://review.gluster.org/#/c/18404
a3470f
> (cherry picked from commit 2c066c4c365e77421d1009851144efae0b028628
a3470f
a3470f
BUG: 1484446
a3470f
Change-Id: Id73c36b73ca600fa9f7905d84053d1e8633c996f
a3470f
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/124875
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
a3470f
---
a3470f
 extras/Makefile.am         |   6 ++-
a3470f
 extras/control-cpu-load.sh | 116 ++++++++++++++++++++++++++++++++++++++++
a3470f
 extras/control-mem.sh      | 128 +++++++++++++++++++++++++++++++++++++++++++++
a3470f
 glusterfs.spec.in          |   5 ++
a3470f
 4 files changed, 254 insertions(+), 1 deletion(-)
a3470f
 create mode 100755 extras/control-cpu-load.sh
a3470f
 create mode 100755 extras/control-mem.sh
a3470f
a3470f
diff --git a/extras/Makefile.am b/extras/Makefile.am
a3470f
index 2812a4c..d9572ac 100644
a3470f
--- a/extras/Makefile.am
a3470f
+++ b/extras/Makefile.am
a3470f
@@ -19,6 +19,10 @@ vol_DATA = glusterd.vol
a3470f
 scriptsdir = $(datadir)/glusterfs/scripts
a3470f
 scripts_SCRIPTS = post-upgrade-script-for-quota.sh \
a3470f
 	pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
a3470f
+if USE_SYSTEMD
a3470f
+scripts_SCRIPTS += control-cpu-load.sh
a3470f
+scripts_SCRIPTS += control-mem.sh
a3470f
+endif
a3470f
 
a3470f
 EXTRA_DIST = $(conf_DATA) specgen.scm glusterfs-mode.el glusterfs.vim \
a3470f
 	migrate-unify-to-distribute.sh backend-xattr-sanitize.sh backend-cleanup.sh \
a3470f
@@ -26,7 +30,7 @@ EXTRA_DIST = $(conf_DATA) specgen.scm glusterfs-mode.el glusterfs.vim \
a3470f
 	post-upgrade-script-for-quota.sh pre-upgrade-script-for-quota.sh \
a3470f
 	command-completion/gluster.bash command-completion/Makefile \
a3470f
 	command-completion/README stop-all-gluster-processes.sh clang-checker.sh \
a3470f
-	mount-shared-storage.sh
a3470f
+	mount-shared-storage.sh control-cpu-load.sh control-mem.sh
a3470f
 
a3470f
 install-data-local:
a3470f
 	if [ -n "$(tmpfilesdir)" ]; then \
a3470f
diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
a3470f
new file mode 100755
a3470f
index 0000000..b739c82
a3470f
--- /dev/null
a3470f
+++ b/extras/control-cpu-load.sh
a3470f
@@ -0,0 +1,116 @@
a3470f
+#!/bin/bash
a3470f
+
a3470f
+USAGE="This script provides a utility to control CPU utilization for any
a3470f
+gluster daemon.In this, we use cgroup framework to configure CPU quota
a3470f
+for a process(like selfheal daemon). Before running this script, make
a3470f
+sure that daemon is running.Every time daemon restarts, it is required
a3470f
+to rerun this command to set CPU quota on new daemon process id.
a3470f
+User can enter any value between 10 to 100 for CPU quota.
a3470f
+Recommended value of quota period is 25. 25 means, kernel will allocate
a3470f
+25 ms period to this group of tasks in every 100 ms period. This 25ms
a3470f
+could be considered as the maximum percentage of CPU quota daemon can take.
a3470f
+This value will be reflected on CPU usage of "top" command.If provided pid
a3470f
+is the only process and no other process is in competition to get CPU, more
a3470f
+ than 25% could be allocated to daemon to speed up the process."
a3470f
+
a3470f
+if [  $# -ge 1 ]; then
a3470f
+  case $1 in
a3470f
+    -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
a3470f
+               exit 0;
a3470f
+               ;;
a3470f
+  *) echo "Please Provide correct input for script."
a3470f
+     echo "For help correct options are -h or --help."
a3470f
+     exit 1;
a3470f
+               ;;
a3470f
+  esac
a3470f
+fi
a3470f
+
a3470f
+DIR_EXIST=0
a3470f
+LOC="/sys/fs/cgroup/cpu,cpuacct/system.slice/glusterd.service"
a3470f
+echo "Enter gluster daemon pid for which you want to control CPU."
a3470f
+read daemon_pid
a3470f
+
a3470f
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
a3470f
+  CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
a3470f
+  if [ -z "${CHECK_PID}" ]; then
a3470f
+    echo "No daemon is running or pid ${daemon_pid} does not match."
a3470f
+    echo "with running gluster processes."
a3470f
+    exit 1
a3470f
+  fi
a3470f
+else
a3470f
+  echo "Entered daemon_pid is not numeric so Rerun the script."
a3470f
+  exit 1
a3470f
+fi
a3470f
+
a3470f
+
a3470f
+if [ -f ${LOC}/tasks ];then
a3470f
+  CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
a3470f
+  if [ ${CHECK_CGROUP} ]; then
a3470f
+    echo "pid ${daemon_pid} is attached with glusterd.service cgroup."
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+cgroup_name=cgroup_gluster_${daemon_pid}
a3470f
+if [ -f ${LOC}/${cgroup_name}/tasks ]; then
a3470f
+  CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
a3470f
+  if [ ${CHECK_CGROUP} ]; then
a3470f
+    val=`cat ${LOC}/${cgroup_name}/cpu.cfs_quota_us`
a3470f
+    qval=$((val / 1000))
a3470f
+    echo "pid ${daemon_pid} is already attached ${cgroup_name} with quota value ${qval}."
a3470f
+    echo "Press n if you don't want to reassign ${daemon_pid} with new quota value."
a3470f
+    DIR_EXIST=1
a3470f
+  else
a3470f
+    echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+read -p "If you want to continue the script to attach ${daemon_pid} with new ${cgroup_name} cgroup Press (y/n)?" choice
a3470f
+case "$choice" in
a3470f
+  y|Y ) echo "yes";;
a3470f
+  n|N ) echo "no";exit;;
a3470f
+  * ) echo "invalid";exit;;
a3470f
+esac
a3470f
+
a3470f
+systemctl set-property glusterd.service CPUShares=1024
a3470f
+
a3470f
+if [ ${DIR_EXIST} -eq 0 ];then
a3470f
+  echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
a3470f
+  mkdir -p ${LOC}/${cgroup_name}
a3470f
+  if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
a3470f
+    echo "Not able to create ${cgroup_name} directory so exit."
a3470f
+    exit 1
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+echo "Enter quota value in range [10,100]:  "
a3470f
+
a3470f
+read quota_value
a3470f
+if expr ${quota_value} + 0 > /dev/null 2>&1 ;then
a3470f
+  if [ ${quota_value} -lt 10 ] || [ ${quota_value} -gt 100 ]; then
a3470f
+    echo "Entered quota value is not correct,it should be in the range ."
a3470f
+    echo "10-100. Ideal value is 25."
a3470f
+    echo "Rerun the sript with correct value."
a3470f
+    exit 1
a3470f
+  else
a3470f
+    echo "Entered quota value is $quota_value"
a3470f
+  fi
a3470f
+else
a3470f
+  echo "Entered quota value is not numeric so Rerun the script."
a3470f
+  exit 1
a3470f
+fi
a3470f
+
a3470f
+quota_value=$((quota_value * 1000))
a3470f
+echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
a3470f
+echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
a3470f
+
a3470f
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
a3470f
+  for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
a3470f
+    do
a3470f
+      echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
a3470f
+    done
a3470f
+  if cat /proc/${daemon_pid}/cgroup | grep -w ${cgroup_name} > /dev/null; then
a3470f
+    echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
a3470f
+  else
a3470f
+    echo "Tasks are not attached successfully."
a3470f
+  fi
a3470f
+fi
a3470f
diff --git a/extras/control-mem.sh b/extras/control-mem.sh
a3470f
new file mode 100755
a3470f
index 0000000..38aa2a0
a3470f
--- /dev/null
a3470f
+++ b/extras/control-mem.sh
a3470f
@@ -0,0 +1,128 @@
a3470f
+#!/bin/bash
a3470f
+
a3470f
+USAGE="This commands provides a utility to control MEMORY utilization for any
a3470f
+gluster daemon.In this, we use cgroup framework to configure MEMORY limit for
a3470f
+a process. Before running this script, make sure that daemon is running.Every
a3470f
+time daemon restarts, it is required to rerun this command to set memory limit
a3470f
+(in bytes) on new daemon process id.User can enter any value between 100
a3470f
+(in Mega bytes) to 8000000000000 for Memory limit in Mega bytes.
a3470f
+Memory limit value is depends on how much maximum memory user wants to restrict
a3470f
+for specific daemon process.If a process will try to consume memore more than
a3470f
+configured value then cgroup will hang/sleep this task and to resume the task
a3470f
+rerun the script with new increase memory limit value ."
a3470f
+
a3470f
+if [  $# -ge 1 ]; then
a3470f
+  case $1 in
a3470f
+    -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
a3470f
+               exit 0;
a3470f
+               ;;
a3470f
+    *) echo "Please Provide correct input for script."
a3470f
+       echo "For help correct options are -h of --help."
a3470f
+       exit 1;
a3470f
+               ;;
a3470f
+  esac
a3470f
+fi
a3470f
+
a3470f
+DIR_EXIST=0
a3470f
+LOC="/sys/fs/cgroup/memory/system.slice/glusterd.service"
a3470f
+echo "Enter Any gluster daemon pid for that you want to control MEMORY."
a3470f
+read daemon_pid
a3470f
+
a3470f
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
a3470f
+  CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
a3470f
+  if [ -z "${CHECK_PID}" ]; then
a3470f
+    echo "No daemon is running or pid ${daemon_pid} does not match."
a3470f
+    echo "with running gluster processes."
a3470f
+    exit 1
a3470f
+  fi
a3470f
+else
a3470f
+  echo "Entered daemon_pid is not numeric so Rerun the script."
a3470f
+  exit 1
a3470f
+fi
a3470f
+
a3470f
+
a3470f
+if [ -f ${LOC}/tasks ]; then
a3470f
+  CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
a3470f
+  if [ ${CHECK_CGROUP} ] ;then
a3470f
+    echo "pid ${daemon_pid} is attached with default glusterd.service cgroup."
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+cgroup_name=cgroup_gluster_${daemon_pid}
a3470f
+if [ -f ${LOC}/${cgroup_name}/tasks ];then
a3470f
+  CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
a3470f
+  if [ ${CHECK_CGROUP} ]; then
a3470f
+    val=`cat ${LOC}/${cgroup_name}/memory.limit_in_bytes`
a3470f
+    mval=$((val / 1024 / 1024))
a3470f
+    echo "pid ${daemon_pid} is already attached ${cgroup_name} with mem value ${mval}."
a3470f
+    echo "Press n if you don't want to reassign ${daemon_pid} with new mem value."
a3470f
+    DIR_EXIST=1
a3470f
+  else
a3470f
+    echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+read -p "If you want to continue the script to attach daeomon with new cgroup. Press (y/n)?" choice
a3470f
+case "$choice" in
a3470f
+  y|Y ) echo "yes";;
a3470f
+  n|N ) echo "no";exit;;
a3470f
+  * ) echo "invalid";exit;;
a3470f
+esac
a3470f
+
a3470f
+systemctl set-property glusterd.service CPUShares=1024
a3470f
+
a3470f
+if [ ${DIR_EXIST} -eq 0 ];then
a3470f
+  echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
a3470f
+  mkdir -p ${LOC}/${cgroup_name}
a3470f
+  if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
a3470f
+    echo "Not able to create ${LOC}/${cgroup_name} directory so exit."
a3470f
+    exit 1
a3470f
+  fi
a3470f
+fi
a3470f
+
a3470f
+echo "Enter Memory value in Mega bytes [100,8000000000000]:  "
a3470f
+
a3470f
+read mem_value
a3470f
+if expr ${mem_value} + 0 > /dev/null 2>&1 ;then
a3470f
+  if [ ${mem_value} -lt 100 ] || [ ${mem_value} -gt 8000000000000 ]; then
a3470f
+    echo "Entered memory value is not correct,it should be in the range ."
a3470f
+    echo "100-8000000000000, Rerun the script with correct value ."
a3470f
+    exit 1
a3470f
+  else
a3470f
+    echo "Entered memory limit value is ${mem_value}."
a3470f
+  fi
a3470f
+else
a3470f
+  echo "Entered memory value is not numeric so Rerun the script."
a3470f
+  exit 1
a3470f
+fi
a3470f
+
a3470f
+mem_value=$(($mem_value * 1024 * 1024))
a3470f
+if [ ${DIR_EXIST} -eq 0 ];then
a3470f
+  echo "Setting ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
a3470f
+  echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
a3470f
+  #Set memory value to memory.memsw.limit_in_bytes
a3470f
+  echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
a3470f
+  # disable oom_control so that kernel will not send kill signal to the
a3470f
+  # task once limit has reached
a3470f
+  echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
a3470f
+else
a3470f
+  #Increase mem_value to memory.memsw.limit_in_bytes
a3470f
+  echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
a3470f
+  echo "Increase ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
a3470f
+  echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
a3470f
+  # disable oom_control so that kernel will not send kill signal to the
a3470f
+  # task once limit has reached
a3470f
+  echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
a3470f
+fi
a3470f
+
a3470f
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
a3470f
+  for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
a3470f
+    do
a3470f
+      echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
a3470f
+    done
a3470f
+  if cat /proc/${daemon_pid}/cgroup | grep -iw ${cgroup_name} > /dev/null; then
a3470f
+    echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
a3470f
+  else
a3470f
+    echo "Tasks are not attached successfully."
a3470f
+  fi
a3470f
+fi
a3470f
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
a3470f
index da8a3e5..56a62a9 100644
a3470f
--- a/glusterfs.spec.in
a3470f
+++ b/glusterfs.spec.in
a3470f
@@ -1553,6 +1553,8 @@ exit 0
a3470f
      %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
a3470f
 %if ( 0%{?_with_systemd:1} )
a3470f
      %{_libexecdir}/glusterfs/mount-shared-storage.sh
a3470f
+     %{_datadir}/glusterfs/scripts/control-cpu-load.sh
a3470f
+     %{_datadir}/glusterfs/scripts/control-mem.sh
a3470f
 %endif
a3470f
 
a3470f
 # Incrementalapi
a3470f
@@ -2178,6 +2180,9 @@ fi
a3470f
 %endif
a3470f
 
a3470f
 %changelog
a3470f
+* Fri Dec 01 2017 Mohit Agrawal <moagrawa@redhat.com>
a3470f
+- Added control-cpu-load.sh and control-mem.sh scripts to glusterfs-server section(#1484446)
a3470f
+
a3470f
 * Mon Nov 13 2017 Jiffin Tony Thottan <jthottan@redhat.com>
a3470f
 - Adding ganesha bits back in gluster repository #1499784
a3470f
 
a3470f
-- 
a3470f
1.8.3.1
a3470f