Blame SOURCES/0009-Improve-the-ps-performance-for-vmcores-with-large-nu.patch

e039e7
From d52cccfaa96ed6f61ff9d53da88715296e31db80 Mon Sep 17 00:00:00 2001
e039e7
From: Tao Liu <ltao@redhat.com>
e039e7
Date: Fri, 21 Jan 2022 13:43:09 +0800
e039e7
Subject: [PATCH 09/11] Improve the ps performance for vmcores with large
e039e7
 number of threads
e039e7
e039e7
Previously, the ps command will iterate over all threads which
e039e7
have the same tgid, to accumulate their rss value, in order to
e039e7
get a thread/process's final rss value as part of the final output.
e039e7
e039e7
For non-live systems, the rss accumulation values are identical for
e039e7
threads which have the same tgid, so there is no need to do the
e039e7
iteration and accumulation repeatly, thus a lot of readmem calls are
e039e7
skipped. Otherwise it will be the performance bottleneck if the
e039e7
vmcores have a large number of threads.
e039e7
e039e7
In this patch, the rss accumulation value will be stored in a cache,
e039e7
next time a thread with the same tgid will take it directly without
e039e7
the iteration.
e039e7
e039e7
For example, we can monitor the performance issue when a vmcore has
e039e7
~65k processes, most of which are threads for several specific
e039e7
processes. Without the patch, it will take ~7h for ps command
e039e7
to finish. With the patch, ps command will finish in 1min.
e039e7
e039e7
Signed-off-by: Tao Liu <ltao@redhat.com>
e039e7
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
e039e7
---
e039e7
 defs.h   |  1 +
e039e7
 memory.c | 70 +++++++++++++++++++++++++++++++-------------------------
e039e7
 task.c   |  1 +
e039e7
 3 files changed, 41 insertions(+), 31 deletions(-)
e039e7
e039e7
diff --git a/defs.h b/defs.h
e039e7
index 41b6cbc6cc85..77e76f27cddb 100644
e039e7
--- a/defs.h
e039e7
+++ b/defs.h
e039e7
@@ -830,6 +830,7 @@ struct task_context {                     /* context stored for each task */
e039e7
 struct tgid_context {               /* tgid and task stored for each task */
e039e7
 	ulong tgid;
e039e7
 	ulong task;
e039e7
+	long rss_cache;
e039e7
 };
e039e7
 
e039e7
 struct task_table {                      /* kernel/local task table data */
e039e7
diff --git a/memory.c b/memory.c
e039e7
index 5af45fd7d834..e80c59ea4534 100644
e039e7
--- a/memory.c
e039e7
+++ b/memory.c
e039e7
@@ -4665,7 +4665,7 @@ void
e039e7
 get_task_mem_usage(ulong task, struct task_mem_usage *tm)
e039e7
 {
e039e7
 	struct task_context *tc;
e039e7
-	long rss = 0;
e039e7
+	long rss = 0, rss_cache = 0;
e039e7
 
e039e7
 	BZERO(tm, sizeof(struct task_mem_usage));
e039e7
 
e039e7
@@ -4730,38 +4730,46 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
e039e7
 					(last->tgid == (last + 1)->tgid))
e039e7
 					last++;
e039e7
 
e039e7
-				while (first <= last)
e039e7
-				{
e039e7
-					/* count 0 -> filepages */
e039e7
-					if (!readmem(first->task +
e039e7
-						OFFSET(task_struct_rss_stat) +
e039e7
-						OFFSET(task_rss_stat_count), KVADDR,
e039e7
-						&sync_rss,
e039e7
-						sizeof(int),
e039e7
-						"task_struct rss_stat MM_FILEPAGES",
e039e7
-						RETURN_ON_ERROR))
e039e7
-							continue;
e039e7
-
e039e7
-					rss += sync_rss;
e039e7
-
e039e7
-					/* count 1 -> anonpages */
e039e7
-					if (!readmem(first->task +
e039e7
-						OFFSET(task_struct_rss_stat) +
e039e7
-						OFFSET(task_rss_stat_count) +
e039e7
-						sizeof(int),
e039e7
-						KVADDR, &sync_rss,
e039e7
-						sizeof(int),
e039e7
-						"task_struct rss_stat MM_ANONPAGES",
e039e7
-						RETURN_ON_ERROR))
e039e7
-							continue;
e039e7
-
e039e7
-					rss += sync_rss;
e039e7
-
e039e7
-					if (first == last)
e039e7
-						break;
e039e7
-					first++;
e039e7
+				/*
e039e7
+				 * Using rss cache for dumpfile is more beneficial than live debug
e039e7
+				 * because its value never changes in dumpfile.
e039e7
+				 */
e039e7
+				if (ACTIVE() || last->rss_cache == UNINITIALIZED) {
e039e7
+					while (first <= last)
e039e7
+					{
e039e7
+						/* count 0 -> filepages */
e039e7
+						if (!readmem(first->task +
e039e7
+							OFFSET(task_struct_rss_stat) +
e039e7
+							OFFSET(task_rss_stat_count), KVADDR,
e039e7
+							&sync_rss,
e039e7
+							sizeof(int),
e039e7
+							"task_struct rss_stat MM_FILEPAGES",
e039e7
+							RETURN_ON_ERROR))
e039e7
+								continue;
e039e7
+
e039e7
+						rss_cache += sync_rss;
e039e7
+
e039e7
+						/* count 1 -> anonpages */
e039e7
+						if (!readmem(first->task +
e039e7
+							OFFSET(task_struct_rss_stat) +
e039e7
+							OFFSET(task_rss_stat_count) +
e039e7
+							sizeof(int),
e039e7
+							KVADDR, &sync_rss,
e039e7
+							sizeof(int),
e039e7
+							"task_struct rss_stat MM_ANONPAGES",
e039e7
+							RETURN_ON_ERROR))
e039e7
+								continue;
e039e7
+
e039e7
+						rss_cache += sync_rss;
e039e7
+
e039e7
+						if (first == last)
e039e7
+							break;
e039e7
+						first++;
e039e7
+					}
e039e7
+					last->rss_cache = rss_cache;
e039e7
 				}
e039e7
 
e039e7
+				rss += last->rss_cache;
e039e7
 				tt->last_tgid = last;
e039e7
 			}
e039e7
 		}
e039e7
diff --git a/task.c b/task.c
e039e7
index a79ed0d96fb5..864c838637ee 100644
e039e7
--- a/task.c
e039e7
+++ b/task.c
e039e7
@@ -2947,6 +2947,7 @@ add_context(ulong task, char *tp)
e039e7
 	tg = tt->tgid_array + tt->running_tasks;
e039e7
 	tg->tgid = *tgid_addr;
e039e7
 	tg->task = task;
e039e7
+	tg->rss_cache = UNINITIALIZED;
e039e7
 
e039e7
         if (do_verify && !verify_task(tc, do_verify)) {
e039e7
 		error(INFO, "invalid task address: %lx\n", tc->task);
e039e7
-- 
e039e7
2.20.1
e039e7