|
|
3b7bb0 |
From 9535680992da4509b22ba0bc9e3da58a21248e71 Mon Sep 17 00:00:00 2001
|
|
|
3b7bb0 |
From: =?UTF-8?q?Ond=C5=99ej=20Lyson=C4=9Bk?= <olysonek@redhat.com>
|
|
|
3b7bb0 |
Date: Thu, 6 Feb 2020 17:39:22 +0100
|
|
|
3b7bb0 |
Subject: [PATCH] Add accelerator-performance profile
|
|
|
3b7bb0 |
MIME-Version: 1.0
|
|
|
3b7bb0 |
Content-Type: text/plain; charset=UTF-8
|
|
|
3b7bb0 |
Content-Transfer-Encoding: 8bit
|
|
|
3b7bb0 |
|
|
|
3b7bb0 |
Resolves: rhbz#1795604
|
|
|
3b7bb0 |
|
|
|
3b7bb0 |
Signed-off-by: Ondřej Lysoněk <olysonek@redhat.com>
|
|
|
3b7bb0 |
---
|
|
|
3b7bb0 |
man/tuned-profiles.7 | 6 +++
|
|
|
3b7bb0 |
profiles/accelerator-performance/tuned.conf | 60 +++++++++++++++++++++
|
|
|
3b7bb0 |
2 files changed, 66 insertions(+)
|
|
|
3b7bb0 |
create mode 100644 profiles/accelerator-performance/tuned.conf
|
|
|
3b7bb0 |
|
|
|
3b7bb0 |
diff --git a/man/tuned-profiles.7 b/man/tuned-profiles.7
|
|
|
3b7bb0 |
index 60a5dc9..aaf140b 100644
|
|
|
3b7bb0 |
--- a/man/tuned-profiles.7
|
|
|
3b7bb0 |
+++ b/man/tuned-profiles.7
|
|
|
3b7bb0 |
@@ -77,6 +77,12 @@ mechanisms and enables sysctl settings that improve the throughput performance
|
|
|
3b7bb0 |
of your disk and network IO. CPU governor is set to performance and CPU energy
|
|
|
3b7bb0 |
performance bias is set to performance. Disk readahead values are increased.
|
|
|
3b7bb0 |
|
|
|
3b7bb0 |
+.TP
|
|
|
3b7bb0 |
+.BI "accelerator\-performance"
|
|
|
3b7bb0 |
+This profile contains the same tuning as the throughput\-performance profile.
|
|
|
3b7bb0 |
+Additionally, it locks the CPU to low C states so that the latency is less than
|
|
|
3b7bb0 |
+100us. This improves the performance of certain accelerators, such as GPUs.
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
.TP
|
|
|
3b7bb0 |
.BI "latency\-performance"
|
|
|
3b7bb0 |
Profile for low latency performance tuning. Disables power saving mechanisms.
|
|
|
3b7bb0 |
diff --git a/profiles/accelerator-performance/tuned.conf b/profiles/accelerator-performance/tuned.conf
|
|
|
3b7bb0 |
new file mode 100644
|
|
|
3b7bb0 |
index 0000000..8068b3b
|
|
|
3b7bb0 |
--- /dev/null
|
|
|
3b7bb0 |
+++ b/profiles/accelerator-performance/tuned.conf
|
|
|
3b7bb0 |
@@ -0,0 +1,60 @@
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+# tuned configuration
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+[main]
|
|
|
3b7bb0 |
+summary=Throughput performance based tuning with disabled higher latency STOP states
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+[cpu]
|
|
|
3b7bb0 |
+governor=performance
|
|
|
3b7bb0 |
+energy_perf_bias=performance
|
|
|
3b7bb0 |
+min_perf_pct=100
|
|
|
3b7bb0 |
+force_latency=99
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+[disk]
|
|
|
3b7bb0 |
+readahead=>4096
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+[sysctl]
|
|
|
3b7bb0 |
+# ktune sysctl settings for rhel6 servers, maximizing i/o throughput
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+# Minimal preemption granularity for CPU-bound tasks:
|
|
|
3b7bb0 |
+# (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds)
|
|
|
3b7bb0 |
+kernel.sched_min_granularity_ns = 10000000
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+# SCHED_OTHER wake-up granularity.
|
|
|
3b7bb0 |
+# (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds)
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+# This option delays the preemption effects of decoupled workloads
|
|
|
3b7bb0 |
+# and reduces their over-scheduling. Synchronous workloads will still
|
|
|
3b7bb0 |
+# have immediate wakeup/sleep latencies.
|
|
|
3b7bb0 |
+kernel.sched_wakeup_granularity_ns = 15000000
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+# If a workload mostly uses anonymous memory and it hits this limit, the entire
|
|
|
3b7bb0 |
+# working set is buffered for I/O, and any more write buffering would require
|
|
|
3b7bb0 |
+# swapping, so it's time to throttle writes until I/O can catch up. Workloads
|
|
|
3b7bb0 |
+# that mostly use file mappings may be able to use even higher values.
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+# The generator of dirty data starts writeback at this percentage (system default
|
|
|
3b7bb0 |
+# is 20%)
|
|
|
3b7bb0 |
+vm.dirty_ratio = 40
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+# Start background writeback (via writeback threads) at this percentage (system
|
|
|
3b7bb0 |
+# default is 10%)
|
|
|
3b7bb0 |
+vm.dirty_background_ratio = 10
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+# PID allocation wrap value. When the kernel's next PID value
|
|
|
3b7bb0 |
+# reaches this value, it wraps back to a minimum PID value.
|
|
|
3b7bb0 |
+# PIDs of value pid_max or larger are not allocated.
|
|
|
3b7bb0 |
+#
|
|
|
3b7bb0 |
+# A suggested value for pid_max is 1024 * <# of cpu cores/threads in system>
|
|
|
3b7bb0 |
+# e.g., a box with 32 cpus, the default of 32768 is reasonable, for 64 cpus,
|
|
|
3b7bb0 |
+# 65536, for 4096 cpus, 4194304 (which is the upper limit possible).
|
|
|
3b7bb0 |
+#kernel.pid_max = 65536
|
|
|
3b7bb0 |
+
|
|
|
3b7bb0 |
+# The swappiness parameter controls the tendency of the kernel to move
|
|
|
3b7bb0 |
+# processes out of physical memory and onto the swap disk.
|
|
|
3b7bb0 |
+# 0 tells the kernel to avoid swapping processes out of physical memory
|
|
|
3b7bb0 |
+# for as long as possible
|
|
|
3b7bb0 |
+# 100 tells the kernel to aggressively swap processes out of physical memory
|
|
|
3b7bb0 |
+# and move them to swap cache
|
|
|
3b7bb0 |
+vm.swappiness=10
|
|
|
3b7bb0 |
--
|
|
|
3b7bb0 |
2.24.1
|
|
|
3b7bb0 |
|