|
|
765b01 |
From 20ecc0827e7837c52f3903638a59959f8bf17f9e Mon Sep 17 00:00:00 2001
|
|
|
765b01 |
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
|
|
|
765b01 |
Date: Tue, 5 Nov 2013 00:29:35 +0900
|
|
|
765b01 |
Subject: [PATCH] [PATCH v2] Improve progress information for huge memory
|
|
|
765b01 |
system.
|
|
|
765b01 |
|
|
|
765b01 |
On system with huge memory, percentage in progress information is
|
|
|
765b01 |
updated at very slow interval, because 1 percent on 1 TiB memory is
|
|
|
765b01 |
about 10 GiB, which looks like as if system has freezed. Then,
|
|
|
765b01 |
confused users might get tempted to push a reset button to recover the
|
|
|
765b01 |
system. We want to avoid such situation as much as possible.
|
|
|
765b01 |
|
|
|
765b01 |
To address the issue, this patch:
|
|
|
765b01 |
|
|
|
765b01 |
- increases the number of calling print_progress() from once in
|
|
|
765b01 |
(written pages / 100)-pages to once in (written pages /
|
|
|
765b01 |
10000)-pages,
|
|
|
765b01 |
|
|
|
765b01 |
- extends precision in progress information by adding 1 digit to its
|
|
|
765b01 |
fractional part, and
|
|
|
765b01 |
|
|
|
765b01 |
- adds spinner that rotates in the order of /, |, \ and - in next to
|
|
|
765b01 |
the progress indicator in percentage,
|
|
|
765b01 |
|
|
|
765b01 |
which help users to get aware that system is still active and crash
|
|
|
765b01 |
dump process is still in progress now.
|
|
|
765b01 |
|
|
|
765b01 |
The spinner code is borrowed from diskdump code.
|
|
|
765b01 |
|
|
|
765b01 |
The ouput is changed from:
|
|
|
765b01 |
|
|
|
765b01 |
Copying data : [ 0 %] /
|
|
|
765b01 |
Copying data : [ 8 %] |
|
|
|
765b01 |
Copying data : [ 11 %] \
|
|
|
765b01 |
Copying data : [ 14 %] -
|
|
|
765b01 |
Copying data : [ 16 %] /
|
|
|
765b01 |
...
|
|
|
765b01 |
Copying data : [ 99 %] /
|
|
|
765b01 |
Copying data : [100 %] |
|
|
|
765b01 |
|
|
|
765b01 |
to:
|
|
|
765b01 |
|
|
|
765b01 |
Copying data : [ 0.1 %] /
|
|
|
765b01 |
Copying data : [ 8.9 %] |
|
|
|
765b01 |
Copying data : [ 11.6 %] \
|
|
|
765b01 |
Copying data : [ 14.3 %] -
|
|
|
765b01 |
Copying data : [ 16.4 %] /
|
|
|
765b01 |
...
|
|
|
765b01 |
Copying data : [ 99.2 %] /
|
|
|
765b01 |
Copying data : [100.0 %] |
|
|
|
765b01 |
|
|
|
765b01 |
This patch doesn't adopt purely time-based approach that records the
|
|
|
765b01 |
time when print_progress() is called at each invocation and print the
|
|
|
765b01 |
recorded time if it is strictly larger than the previous time value.
|
|
|
765b01 |
|
|
|
765b01 |
The problem is that calling time() system call amounts to
|
|
|
765b01 |
considertably long time in total on huge memory system. For example,
|
|
|
765b01 |
here is a simple bench that measures total execution time of time()
|
|
|
765b01 |
system call for (1TiB / 4KiB)-times:
|
|
|
765b01 |
|
|
|
765b01 |
$ ./bench
|
|
|
765b01 |
total: 18.360503
|
|
|
765b01 |
total: 34.910297
|
|
|
765b01 |
|
|
|
765b01 |
the result of which reveals that it amounts to about 20 seconds with
|
|
|
765b01 |
vDSO optimization and about 35 seconds without.
|
|
|
765b01 |
|
|
|
765b01 |
BTW, on our 12 TiB memory system, we collect about 300 GiB crash dump
|
|
|
765b01 |
in about 40 minutes with dump level 31. On 12TiB, the benchmark result
|
|
|
765b01 |
corresponds to about 4 minutes and 7 minutes respectively, both of which
|
|
|
765b01 |
affects a whole performance.
|
|
|
765b01 |
|
|
|
765b01 |
==bench.c
|
|
|
765b01 |
static inline double getdtime(void)
|
|
|
765b01 |
{
|
|
|
765b01 |
struct timeval tv;
|
|
|
765b01 |
gettimeofday(&tv, NULL);
|
|
|
765b01 |
return (double)tv.tv_sec + (double)tv.tv_usec * 1.0e-6;
|
|
|
765b01 |
}
|
|
|
765b01 |
|
|
|
765b01 |
int main(int argc, char **argv)
|
|
|
765b01 |
{
|
|
|
765b01 |
unsigned long i;
|
|
|
765b01 |
time_t t;
|
|
|
765b01 |
double t1, t2, total;
|
|
|
765b01 |
const int NR_time = 201;
|
|
|
765b01 |
const unsigned long nr_repeat = (1UL << 40) / 4096;
|
|
|
765b01 |
|
|
|
765b01 |
total = 0;
|
|
|
765b01 |
for (i = 0; i < nr_repeat; ++i) {
|
|
|
765b01 |
t1 = getdtime();
|
|
|
765b01 |
time(&t);
|
|
|
765b01 |
t2 = getdtime();
|
|
|
765b01 |
total += t2 - t1;
|
|
|
765b01 |
}
|
|
|
765b01 |
printf("total: %lf\n", total);
|
|
|
765b01 |
|
|
|
765b01 |
total = 0;
|
|
|
765b01 |
for (i = 0; i < nr_repeat; ++i) {
|
|
|
765b01 |
t1 = getdtime();
|
|
|
765b01 |
syscall(NR_time, &t);
|
|
|
765b01 |
t2 = getdtime();
|
|
|
765b01 |
total += t2 - t1;
|
|
|
765b01 |
}
|
|
|
765b01 |
printf("total: %lf\n", total);
|
|
|
765b01 |
|
|
|
765b01 |
return 0;
|
|
|
765b01 |
}
|
|
|
765b01 |
==
|
|
|
765b01 |
|
|
|
765b01 |
Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
|
|
|
765b01 |
---
|
|
|
765b01 |
makedumpfile.c | 8 ++++----
|
|
|
765b01 |
print_info.c | 15 +++++++++------
|
|
|
765b01 |
2 files changed, 13 insertions(+), 10 deletions(-)
|
|
|
765b01 |
|
|
|
765b01 |
diff --git a/makedumpfile-1.5.4/makedumpfile.c b/makedumpfile-1.5.4/makedumpfile.c
|
|
|
765b01 |
index dafe83b..3746cf6 100644
|
|
|
765b01 |
--- a/makedumpfile-1.5.4/makedumpfile.c
|
|
|
765b01 |
+++ b/makedumpfile-1.5.4/makedumpfile.c
|
|
|
765b01 |
@@ -5573,7 +5573,7 @@ write_elf_pages(struct cache_data *cd_header, struct cache_data *cd_page)
|
|
|
765b01 |
initialize_2nd_bitmap(&bitmap2);
|
|
|
765b01 |
|
|
|
765b01 |
num_dumpable = get_num_dumpable();
|
|
|
765b01 |
- per = num_dumpable / 100;
|
|
|
765b01 |
+ per = num_dumpable / 10000;
|
|
|
765b01 |
|
|
|
765b01 |
off_seg_load = info->offset_load_dumpfile;
|
|
|
765b01 |
cd_page->offset = info->offset_load_dumpfile;
|
|
|
765b01 |
@@ -5858,7 +5858,7 @@ write_elf_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page)
|
|
|
765b01 |
return FALSE;
|
|
|
765b01 |
|
|
|
765b01 |
num_dumpable = info->num_dumpable;
|
|
|
765b01 |
- per = num_dumpable / 100;
|
|
|
765b01 |
+ per = num_dumpable / 10000;
|
|
|
765b01 |
|
|
|
765b01 |
off_seg_load = info->offset_load_dumpfile;
|
|
|
765b01 |
cd_page->offset = info->offset_load_dumpfile;
|
|
|
765b01 |
@@ -6116,7 +6116,7 @@ write_kdump_pages(struct cache_data *cd_header, struct cache_data *cd_page)
|
|
|
765b01 |
}
|
|
|
765b01 |
|
|
|
765b01 |
num_dumpable = get_num_dumpable();
|
|
|
765b01 |
- per = num_dumpable / 100;
|
|
|
765b01 |
+ per = num_dumpable / 10000;
|
|
|
765b01 |
|
|
|
765b01 |
/*
|
|
|
765b01 |
* Calculate the offset of the page data.
|
|
|
765b01 |
@@ -6317,7 +6317,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
|
|
|
765b01 |
goto out;
|
|
|
765b01 |
}
|
|
|
765b01 |
|
|
|
765b01 |
- per = info->num_dumpable / 100;
|
|
|
765b01 |
+ per = info->num_dumpable / 10000;
|
|
|
765b01 |
|
|
|
765b01 |
/*
|
|
|
765b01 |
* Set a fileoffset of Physical Address 0x0.
|
|
|
765b01 |
diff --git a/makedumpfile-1.5.4/print_info.c b/makedumpfile-1.5.4/print_info.c
|
|
|
765b01 |
index 3527970..d7a8600 100644
|
|
|
765b01 |
--- a/makedumpfile-1.5.4/print_info.c
|
|
|
765b01 |
+++ b/makedumpfile-1.5.4/print_info.c
|
|
|
765b01 |
@@ -283,27 +283,30 @@ print_usage(void)
|
|
|
765b01 |
void
|
|
|
765b01 |
print_progress(const char *msg, unsigned long current, unsigned long end)
|
|
|
765b01 |
{
|
|
|
765b01 |
- int progress;
|
|
|
765b01 |
+ float progress;
|
|
|
765b01 |
time_t tm;
|
|
|
765b01 |
static time_t last_time = 0;
|
|
|
765b01 |
+ static unsigned int lapse = 0;
|
|
|
765b01 |
+ static const char *spinner = "/|\\-";
|
|
|
765b01 |
|
|
|
765b01 |
if (current < end) {
|
|
|
765b01 |
tm = time(NULL);
|
|
|
765b01 |
if (tm - last_time < 1)
|
|
|
765b01 |
return;
|
|
|
765b01 |
last_time = tm;
|
|
|
765b01 |
- progress = current * 100 / end;
|
|
|
765b01 |
+ progress = (float)current * 100 / end;
|
|
|
765b01 |
} else
|
|
|
765b01 |
progress = 100;
|
|
|
765b01 |
|
|
|
765b01 |
if (flag_ignore_r_char) {
|
|
|
765b01 |
- PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3d %%]\n",
|
|
|
765b01 |
- msg, progress);
|
|
|
765b01 |
+ PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%5.1f %%] %c\n",
|
|
|
765b01 |
+ msg, progress, spinner[lapse % 4]);
|
|
|
765b01 |
} else {
|
|
|
765b01 |
PROGRESS_MSG("\r");
|
|
|
765b01 |
- PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3d %%] ",
|
|
|
765b01 |
- msg, progress);
|
|
|
765b01 |
+ PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%5.1f %%] %c",
|
|
|
765b01 |
+ msg, progress, spinner[lapse % 4]);
|
|
|
765b01 |
}
|
|
|
765b01 |
+ lapse++;
|
|
|
765b01 |
}
|
|
|
765b01 |
|
|
|
765b01 |
void
|
|
|
765b01 |
--
|
|
|
765b01 |
1.8.3.1
|
|
|
765b01 |
|