Blob Blame History Raw
diff -Naurp pcp-4.3.2.orig/qa/1280 pcp-4.3.2/qa/1280
--- pcp-4.3.2.orig/qa/1280	1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1280	2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,86 @@
+#!/bin/sh
+# PCP QA Test No. 1280
+# segv in __pmGetArchiveEnd_ctx()
+#
+# Copyright (c) 2020 Ken McDonell.  All Rights Reserved.
+#
+
+if [ $# -eq 0 ]
+then
+    seq=`basename $0`
+    echo "QA output created by $seq"
+else
+    # use $seq from caller, unless not set
+    [ -n "$seq" ] || seq=`basename $0`
+    echo "QA output created by `basename $0` $*"
+fi
+
+# get standard environment, filters and checks
+. ./common.product
+. ./common.filter
+. ./common.check
+
+do_valgrind=false
+if [ "$1" = "--valgrind" ]
+then
+    _check_valgrind
+    do_valgrind=true
+fi
+
+if which unxz >/dev/null
+then
+    :
+else
+    _notrun "need unxz application installed"
+fi
+
+_cleanup()
+{
+    cd $here
+    $sudo rm -rf $tmp $tmp.*
+}
+
+status=0	# success is the default!
+$sudo rm -rf $tmp $tmp.* $seq.full
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_doit()
+{
+    if $do_valgrind
+    then
+	_run_valgrind pmdumplog -z -Dlog -L 20180416.10.00
+    else
+	pmdumplog -z -Dlog -L 20180416.10.00 2>>$here/$seq.full
+    fi
+}
+
+# real QA test starts here
+mkdir $tmp
+cp archives/20180416.10.00* $tmp
+cd $tmp
+for f in *.xz
+do
+    unxz $f
+done
+
+echo "=== full archive ==="
+_doit
+
+echo
+echo "=== volume 3 missing ==="
+rm 20180416.10.00.3
+_doit
+
+echo
+echo "=== volume 2 truncated ==="
+size=`stat 20180416.10.00.2 | sed -n -e '/Size:/{
+s/.*Size: *//
+s/ .*//
+p
+}'`
+size=`expr $size - 8`
+truncate -s $size 20180416.10.00.2
+_doit
+
+# success, all done
+exit
diff -Naurp pcp-4.3.2.orig/qa/1280.out pcp-4.3.2/qa/1280.out
--- pcp-4.3.2.orig/qa/1280.out	1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1280.out	2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,30 @@
+QA output created by 1280
+=== full archive ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:32:47.409469 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+
+=== volume 3 missing ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:26:25.396920 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+
+=== volume 2 truncated ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:26:25.380430 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
diff -Naurp pcp-4.3.2.orig/qa/1281 pcp-4.3.2/qa/1281
--- pcp-4.3.2.orig/qa/1281	1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1281	2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,52 @@
+#!/bin/sh
+# PCP QA Test No. 1281
+# segv in __pmGetArchiveEnd_ctx()
+#
+# -- valgrind version of qa/1280
+#
+# Copyright (c) 2020 Ken McDonell.  All Rights Reserved.
+#
+
+if [ $# -eq 0 ]
+then
+    seq=`basename $0`
+    echo "QA output created by $seq"
+else
+    # use $seq from caller, unless not set
+    [ -n "$seq" ] || seq=`basename $0`
+    echo "QA output created by `basename $0` $*"
+fi
+
+# get standard environment, filters and checks
+. ./common.product
+. ./common.filter
+. ./common.check
+
+do_valgrind=false
+if [ "$1" = "--valgrind" ]
+then
+    _check_valgrind
+    do_valgrind=true
+fi
+
+_cleanup()
+{
+    cd $here
+    $sudo rm -rf $tmp $tmp.*
+}
+
+status=0	# success is the default!
+$sudo rm -rf $tmp $tmp.* $seq.full
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# real QA test starts here
+export seq
+./1280 --valgrind \
+| $PCP_AWK_PROG '
+skip == 1 && $1 == "==="	{ skip = 0 }
+/^=== std err ===/		{ skip = 1 }
+skip == 0		{ print }
+skip == 1		{ print >"'$here/$seq.full'" }'
+
+# success, all done
+exit
diff -Naurp pcp-4.3.2.orig/qa/1281.out pcp-4.3.2/qa/1281.out
--- pcp-4.3.2.orig/qa/1281.out	1970-01-01 10:00:00.000000000 +1000
+++ pcp-4.3.2/qa/1281.out	2020-11-04 10:58:30.575985524 +1100
@@ -0,0 +1,55 @@
+QA output created by 1281
+QA output created by 1280 --valgrind
+=== full archive ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:32:47.409469 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
+
+=== volume 3 missing ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:26:25.396920 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
+
+=== volume 2 truncated ===
+=== std out ===
+Note: timezone set to local timezone of host "brolley-t530" from archive
+
+Log Label (Log Format Version 2)
+Performance metrics from host brolley-t530
+    commencing Mon Apr 16 10:01:25.325401 2018
+    ending     Mon Apr 16 14:26:25.380430 2018
+Archive timezone: EDT+4
+PID for pmlogger: 7047
+=== filtered valgrind report ===
+Memcheck, a memory error detector
+Command: pmdumplog -z -Dlog -L 20180416.10.00
+LEAK SUMMARY:
+definitely lost: 0 bytes in 0 blocks
+indirectly lost: 0 bytes in 0 blocks
+ERROR SUMMARY: 0 errors from 0 contexts ...
diff -Naurp pcp-4.3.2.orig/qa/417.out pcp-4.3.2/qa/417.out
--- pcp-4.3.2.orig/qa/417.out	2018-06-19 09:19:05.000000000 +1000
+++ pcp-4.3.2/qa/417.out	2020-11-04 10:44:15.722280608 +1100
@@ -253,6 +253,11 @@ Temporal Index
 === pmdumplog -t, no index ===
 Note: timezone set to local timezone of host "moomba" from archive
 
+pmdumplog: Cannot locate end of archive bad: Corrupted record in a PCP archive log
+
+WARNING: This archive is sufficiently damaged that it may not be possible to
+         produce complete information.  Continuing and hoping for the best.
+
 
 Temporal Index
 		Log Vol    end(meta)     end(log)
diff -Naurp pcp-4.3.2.orig/qa/group pcp-4.3.2/qa/group
--- pcp-4.3.2.orig/qa/group	2020-11-04 10:43:55.184527945 +1100
+++ pcp-4.3.2/qa/group	2020-11-04 10:58:43.746826907 +1100
@@ -1614,6 +1614,8 @@ BAD
 1269 libpcp local kernel
 1274 pmlogextract pmdumplog labels help local sanity
 1276 pmmgr containers local
+1280 archive pmdumplog local
+1281 archive pmdumplog valgrind local
 1287 pmda.install pmda.prometheus local python
 1289 pmval archive multi-archive decompress-xz local pmlogextract
 1294 libpcp_mmv labels local valgrind
diff -Naurp pcp-4.3.2.orig/src/libpcp/src/logutil.c pcp-4.3.2/src/libpcp/src/logutil.c
--- pcp-4.3.2.orig/src/libpcp/src/logutil.c	2019-02-26 07:30:38.000000000 +1100
+++ pcp-4.3.2/src/libpcp/src/logutil.c	2020-11-04 10:44:15.722280608 +1100
@@ -2558,6 +2558,11 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
     found = 0;
     sts = PM_ERR_LOGREC;	/* default error condition */
     f = NULL;
+
+    /*
+     * start at last volume and work backwards until success or
+     * failure
+     */
     for (vol = lcp->l_maxvol; vol >= lcp->l_minvol; vol--) {
 	if (acp->ac_curvol == vol) {
 	    f = acp->ac_mfp;
@@ -2571,11 +2576,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
 
 	if (__pmFstat(f, &sbuf) < 0) {
 	    /* if we can't stat() this one, then try previous volume(s) */
-	    if (f != acp->ac_mfp) {
-		__pmFclose(f);
-		f = NULL;
-	    }
-	    continue;
+	    goto prior_vol;
 	}
 
 	if (vol == lcp->l_maxvol && sbuf.st_size == lcp->l_physend) {
@@ -2588,16 +2589,14 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
 
 	/* if this volume is empty, try previous volume */
 	if (sbuf.st_size <= (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) {
-	    if (f != acp->ac_mfp) {
-		__pmFclose(f);
-		f = NULL;
-	    }
-	    continue;
+	    goto prior_vol;
 	}
 
 	physend = (__pm_off_t)sbuf.st_size;
 	if (sizeof(off_t) > sizeof(__pm_off_t)) {
+	    /* 64-bit off_t */
 	    if (physend != sbuf.st_size) {
+		/* oops, 32-bit offset not the same */
 		pmNotifyErr(LOG_ERR, "pmGetArchiveEnd: PCP archive file"
 			" (meta) too big (%"PRIi64" bytes)\n",
 			(uint64_t)sbuf.st_size);
@@ -2621,18 +2620,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
 	 */
 	logend = (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int);
 	for (i = lcp->l_numti - 1; i >= 0; i--) {
-	    if (lcp->l_ti[i].ti_vol != vol) {
-		if (f != acp->ac_mfp) {
-		    __pmFclose(f);
-		    f = NULL;
-		}
+	    if (lcp->l_ti[i].ti_vol != vol)
 		continue;
-	    }
 	    if (lcp->l_ti[i].ti_log <= physend) {
 		logend = lcp->l_ti[i].ti_log;
 		break;
 	    }
 	}
+	if (i < 0) {
+	    /* no dice in the temporal index, try previous volume */
+	    goto prior_vol;
+	}
 
 	/*
 	 * Now chase it forwards from the last index entry ...
@@ -2642,6 +2640,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
 	 *	valid record, so if not at start of volume, back up one
 	 *	record, then scan forwards.
 	 */
+	assert(f != NULL);
 	__pmFseek(f, (long)logend, SEEK_SET);
 	if (logend > (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) {
 	    if (paranoidLogRead(ctxp, PM_MODE_BACK, f, &rp) < 0) {
@@ -2681,10 +2680,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp,
 	if (found)
 	    break;
 
+prior_vol:
 	/*
 	 * this probably means this volume contains no useful records,
 	 * try the previous volume
 	 */
+	if (f != acp->ac_mfp) {
+	    /* f comes from _logpeek(), close it */
+	    __pmFclose(f);
+	    f = NULL;
+	}
+
     }/*for*/
 
     if (f == acp->ac_mfp)