diff -Naurp pcp-4.3.2.orig/qa/1280 pcp-4.3.2/qa/1280
--- pcp-4.3.2.orig/qa/1280 1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1280 2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,86 @@
+#!/bin/sh
+# PCP QA Test No. 1280
+# segv in __pmGetArchiveEnd_ctx()
+#
+# Copyright (c) 2020 Ken McDonell. All Rights Reserved.
+#
+
+if [ $# -eq 0 ]
+then
+ seq=`basename $0`
+ echo "QA output created by $seq"
+else
+ # use $seq from caller, unless not set
+ [ -n "$seq" ] || seq=`basename $0`
+ echo "QA output created by `basename $0` $*"
+fi
+
+# get standard environment, filters and checks
+. ./common.product
+. ./common.filter
+. ./common.check
+
+do_valgrind=false
+if [ "$1" = "--valgrind" ]
+then
+ _check_valgrind
+ do_valgrind=true
+fi
+
+if which unxz >/dev/null
+then
+ :
+else
+ _notrun "need unxz application installed"
+fi
+
+_cleanup()
+{
+ cd $here
+ $sudo rm -rf $tmp $tmp.*
+}
+
+status=0 # success is the default!
+$sudo rm -rf $tmp $tmp.* $seq.full
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_doit()
+{
+ if $do_valgrind
+ then
+ _run_valgrind pmdumplog -z -Dlog -L 20180416.10.00
+ else
+ pmdumplog -z -Dlog -L 20180416.10.00 2>>$here/$seq.full
+ fi
+}
+
+# real QA test starts here
+mkdir $tmp
+cp archives/20180416.10.00* $tmp
+cd $tmp
+for f in *.xz
+do
+ unxz $f
+done
+
+echo "=== full archive ==="
+_doit
+
+echo
+echo "=== volume 3 missing ==="
+rm 20180416.10.00.3
+_doit
+
+echo
+echo "=== volume 2 truncated ==="
+size=`stat 20180416.10.00.2 | sed -n -e '/Size:/{
+s/.*Size: *//
+s/ .*//
+p
+}'`
+size=`expr $size - 8`
+truncate -s $size 20180416.10.00.2
+_doit
+
+# success, all done
+exit
diff -Naurp pcp-4.3.2.orig/qa/1280.out pcp-4.3.2/qa/1280.out
--- pcp-4.3.2.orig/qa/1280.out 1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1280.out 2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,30 @@
+QA output created by 1280
+=== full archive ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:32:47.409469 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+
+=== volume 3 missing ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:26:25.396920 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+
+=== volume 2 truncated ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:26:25.380430 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
diff -Naurp pcp-4.3.2.orig/qa/1281 pcp-4.3.2/qa/1281
--- pcp-4.3.2.orig/qa/1281 1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1281 2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,52 @@
+#!/bin/sh
+# PCP QA Test No. 1281
+# segv in __pmGetArchiveEnd_ctx()
+#
+# -- valgrind version of qa/1280
+#
+# Copyright (c) 2020 Ken McDonell. All Rights Reserved.
+#
+
+if [ $# -eq 0 ]
+then
+ seq=`basename $0`
+ echo "QA output created by $seq"
+else
+ # use $seq from caller, unless not set
+ [ -n "$seq" ] || seq=`basename $0`
+ echo "QA output created by `basename $0` $*"
+fi
+
+# get standard environment, filters and checks
+. ./common.product
+. ./common.filter
+. ./common.check
+
+do_valgrind=false
+if [ "$1" = "--valgrind" ]
+then
+ _check_valgrind
+ do_valgrind=true
+fi
+
+_cleanup()
+{
+ cd $here
+ $sudo rm -rf $tmp $tmp.*
+}
+
+status=0 # success is the default!
+$sudo rm -rf $tmp $tmp.* $seq.full
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# real QA test starts here
+export seq
+./1280 --valgrind \
+| $PCP_AWK_PROG '
+skip == 1 && $1 == "===" { skip = 0 }
+/^=== std err ===/ { skip = 1 }
+skip == 0 { print }
+skip == 1 { print >"'$here/$seq.full'" }'
+
+# success, all done
+exit
diff -Naurp pcp-4.3.2.orig/qa/1281.out pcp-4.3.2/qa/1281.out
--- pcp-4.3.2.orig/qa/1281.out 1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1281.out 2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,55 @@
+QA output created by 1281
+QA output created by 1280 --valgrind
+=== full archive ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:32:47.409469 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
+
+=== volume 3 missing ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:26:25.396920 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
+
+=== volume 2 truncated ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+ commencing Mon Apr 16 10:01:25.325401 2018
+ ending Mon Apr 16 14:26:25.380430 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
diff -Naurp pcp-4.3.2.orig/qa/417.out pcp-4.3.2/qa/417.out
--- pcp-4.3.2.orig/qa/417.out 2018-06-19 09:19:05.000000000 +1000
+++ pcp-4.3.2/qa/417.out 2020-11-04 10:44:15.722280608 +1100
@@ -253,6 +253,11 @@ Temporal Index
=== pmdumplog -t, no index ===
Note: timezone set to local timezone of host "moomba" from archive
+pmdumplog: Cannot locate end of archive bad: Corrupted record in a PCP archive log
+
+WARNING: This archive is sufficiently damaged that it may not be possible to
+ produce complete information. Continuing and hoping for the best.
+
Temporal Index
Log Vol end(meta) end(log)
diff -Naurp pcp-4.3.2.orig/qa/group pcp-4.3.2/qa/group
--- pcp-4.3.2.orig/qa/group 2020-11-04 10:43:55.184527945 +1100
+++ pcp-4.3.2/qa/group 2020-11-04 10:58:43.746826907 +1100
@@ -1614,6 +1614,8 @@ BAD
1269 libpcp local kernel
1274 pmlogextract pmdumplog labels help local sanity
1276 pmmgr containers local
+1280 archive pmdumplog local
+1281 archive pmdumplog valgrind local
1287 pmda.install pmda.prometheus local python
1289 pmval archive multi-archive decompress-xz local pmlogextract
1294 libpcp_mmv labels local valgrind
diff -Naurp pcp-4.3.2.orig/src/libpcp/src/logutil.c pcp-4.3.2/src/libpcp/src/logutil.c
--- pcp-4.3.2.orig/src/libpcp/src/logutil.c 2019-02-26 07:30:38.000000000 +1100
+++ pcp-4.3.2/src/libpcp/src/logutil.c 2020-11-04 10:44:15.722280608 +1100
@@ -2558,6 +2558,11 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
found = 0;
sts = PM_ERR_LOGREC; /* default error condition */
f = NULL;
+
+ /*
+ * start at last volume and work backwards until success or
+ * failure
+ */
for (vol = lcp->l_maxvol; vol >= lcp->l_minvol; vol--) {
if (acp->ac_curvol == vol) {
f = acp->ac_mfp;
@@ -2571,11 +2576,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
if (__pmFstat(f, &sbuf) < 0) {
/* if we can't stat() this one, then try previous volume(s) */
- if (f != acp->ac_mfp) {
- __pmFclose(f);
- f = NULL;
- }
- continue;
+ goto prior_vol;
}
if (vol == lcp->l_maxvol && sbuf.st_size == lcp->l_physend) {
@@ -2588,16 +2589,14 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
/* if this volume is empty, try previous volume */
if (sbuf.st_size <= (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) {
- if (f != acp->ac_mfp) {
- __pmFclose(f);
- f = NULL;
- }
- continue;
+ goto prior_vol;
}
physend = (__pm_off_t)sbuf.st_size;
if (sizeof(off_t) > sizeof(__pm_off_t)) {
+ /* 64-bit off_t */
if (physend != sbuf.st_size) {
+ /* oops, 32-bit offset not the same */
pmNotifyErr(LOG_ERR, "pmGetArchiveEnd: PCP archive file"
" (meta) too big (%"PRIi64" bytes)\n",
(uint64_t)sbuf.st_size);
@@ -2621,18 +2620,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
*/
logend = (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int);
for (i = lcp->l_numti - 1; i >= 0; i--) {
- if (lcp->l_ti[i].ti_vol != vol) {
- if (f != acp->ac_mfp) {
- __pmFclose(f);
- f = NULL;
- }
+ if (lcp->l_ti[i].ti_vol != vol)
continue;
- }
if (lcp->l_ti[i].ti_log <= physend) {
logend = lcp->l_ti[i].ti_log;
break;
}
}
+ if (i < 0) {
+ /* no dice in the temporal index, try previous volume */
+ goto prior_vol;
+ }
/*
* Now chase it forwards from the last index entry ...
@@ -2642,6 +2640,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
* valid record, so if not at start of volume, back up one
* record, then scan forwards.
*/
+ assert(f != NULL);
__pmFseek(f, (long)logend, SEEK_SET);
if (logend > (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) {
if (paranoidLogRead(ctxp, PM_MODE_BACK, f, &rp) < 0) {
@@ -2681,10 +2680,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
if (found)
break;
+prior_vol:
/*
* this probably means this volume contains no useful records,
* try the previous volume
*/
+ if (f != acp->ac_mfp) {
+ /* f comes from _logpeek(), close it */
+ __pmFclose(f);
+ f = NULL;
+ }
+
}/*for*/
if (f == acp->ac_mfp)