diff -Naurp pcp-4.3.2.orig/qa/1280 pcp-4.3.2/qa/1280 --- pcp-4.3.2.orig/qa/1280 1970-01-01 10:00:00.000000000 +1000 +++ pcp-4.3.2/qa/1280 2020-11-04 10:58:30.575985524 +1100 @@ -0,0 +1,86 @@ +#!/bin/sh +# PCP QA Test No. 1280 +# segv in __pmGetArchiveEnd_ctx() +# +# Copyright (c) 2020 Ken McDonell. All Rights Reserved. +# + +if [ $# -eq 0 ] +then + seq=`basename $0` + echo "QA output created by $seq" +else + # use $seq from caller, unless not set + [ -n "$seq" ] || seq=`basename $0` + echo "QA output created by `basename $0` $*" +fi + +# get standard environment, filters and checks +. ./common.product +. ./common.filter +. ./common.check + +do_valgrind=false +if [ "$1" = "--valgrind" ] +then + _check_valgrind + do_valgrind=true +fi + +if which unxz >/dev/null +then + : +else + _notrun "need unxz application installed" +fi + +_cleanup() +{ + cd $here + $sudo rm -rf $tmp $tmp.* +} + +status=0 # success is the default! +$sudo rm -rf $tmp $tmp.* $seq.full +trap "_cleanup; exit \$status" 0 1 2 3 15 + +_doit() +{ + if $do_valgrind + then + _run_valgrind pmdumplog -z -Dlog -L 20180416.10.00 + else + pmdumplog -z -Dlog -L 20180416.10.00 2>>$here/$seq.full + fi +} + +# real QA test starts here +mkdir $tmp +cp archives/20180416.10.00* $tmp +cd $tmp +for f in *.xz +do + unxz $f +done + +echo "=== full archive ===" +_doit + +echo +echo "=== volume 3 missing ===" +rm 20180416.10.00.3 +_doit + +echo +echo "=== volume 2 truncated ===" +size=`stat 20180416.10.00.2 | sed -n -e '/Size:/{ +s/.*Size: *// +s/ .*// +p +}'` +size=`expr $size - 8` +truncate -s $size 20180416.10.00.2 +_doit + +# success, all done +exit diff -Naurp pcp-4.3.2.orig/qa/1280.out pcp-4.3.2/qa/1280.out --- pcp-4.3.2.orig/qa/1280.out 1970-01-01 10:00:00.000000000 +1000 +++ pcp-4.3.2/qa/1280.out 2020-11-04 10:58:30.575985524 +1100 @@ -0,0 +1,30 @@ +QA output created by 1280 +=== full archive === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:32:47.409469 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 + +=== volume 3 missing === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:26:25.396920 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 + +=== volume 2 truncated === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:26:25.380430 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 diff -Naurp pcp-4.3.2.orig/qa/1281 pcp-4.3.2/qa/1281 --- pcp-4.3.2.orig/qa/1281 1970-01-01 10:00:00.000000000 +1000 +++ pcp-4.3.2/qa/1281 2020-11-04 10:58:30.575985524 +1100 @@ -0,0 +1,52 @@ +#!/bin/sh +# PCP QA Test No. 1281 +# segv in __pmGetArchiveEnd_ctx() +# +# -- valgrind version of qa/1280 +# +# Copyright (c) 2020 Ken McDonell. All Rights Reserved. +# + +if [ $# -eq 0 ] +then + seq=`basename $0` + echo "QA output created by $seq" +else + # use $seq from caller, unless not set + [ -n "$seq" ] || seq=`basename $0` + echo "QA output created by `basename $0` $*" +fi + +# get standard environment, filters and checks +. ./common.product +. ./common.filter +. ./common.check + +do_valgrind=false +if [ "$1" = "--valgrind" ] +then + _check_valgrind + do_valgrind=true +fi + +_cleanup() +{ + cd $here + $sudo rm -rf $tmp $tmp.* +} + +status=0 # success is the default! +$sudo rm -rf $tmp $tmp.* $seq.full +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# real QA test starts here +export seq +./1280 --valgrind \ +| $PCP_AWK_PROG ' +skip == 1 && $1 == "===" { skip = 0 } +/^=== std err ===/ { skip = 1 } +skip == 0 { print } +skip == 1 { print >"'$here/$seq.full'" }' + +# success, all done +exit diff -Naurp pcp-4.3.2.orig/qa/1281.out pcp-4.3.2/qa/1281.out --- pcp-4.3.2.orig/qa/1281.out 1970-01-01 10:00:00.000000000 +1000 +++ pcp-4.3.2/qa/1281.out 2020-11-04 10:58:30.575985524 +1100 @@ -0,0 +1,55 @@ +QA output created by 1281 +QA output created by 1280 --valgrind +=== full archive === +=== std out === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:32:47.409469 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 +=== filtered valgrind report === +Memcheck, a memory error detector +Command: pmdumplog -z -Dlog -L 20180416.10.00 +LEAK SUMMARY: +definitely lost: 0 bytes in 0 blocks +indirectly lost: 0 bytes in 0 blocks +ERROR SUMMARY: 0 errors from 0 contexts ... + +=== volume 3 missing === +=== std out === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:26:25.396920 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 +=== filtered valgrind report === +Memcheck, a memory error detector +Command: pmdumplog -z -Dlog -L 20180416.10.00 +LEAK SUMMARY: +definitely lost: 0 bytes in 0 blocks +indirectly lost: 0 bytes in 0 blocks +ERROR SUMMARY: 0 errors from 0 contexts ... + +=== volume 2 truncated === +=== std out === +Note: timezone set to local timezone of host "brolley-t530" from archive + +Log Label (Log Format Version 2) +Performance metrics from host brolley-t530 + commencing Mon Apr 16 10:01:25.325401 2018 + ending Mon Apr 16 14:26:25.380430 2018 +Archive timezone: EDT+4 +PID for pmlogger: 7047 +=== filtered valgrind report === +Memcheck, a memory error detector +Command: pmdumplog -z -Dlog -L 20180416.10.00 +LEAK SUMMARY: +definitely lost: 0 bytes in 0 blocks +indirectly lost: 0 bytes in 0 blocks +ERROR SUMMARY: 0 errors from 0 contexts ... diff -Naurp pcp-4.3.2.orig/qa/417.out pcp-4.3.2/qa/417.out --- pcp-4.3.2.orig/qa/417.out 2018-06-19 09:19:05.000000000 +1000 +++ pcp-4.3.2/qa/417.out 2020-11-04 10:44:15.722280608 +1100 @@ -253,6 +253,11 @@ Temporal Index === pmdumplog -t, no index === Note: timezone set to local timezone of host "moomba" from archive +pmdumplog: Cannot locate end of archive bad: Corrupted record in a PCP archive log + +WARNING: This archive is sufficiently damaged that it may not be possible to + produce complete information. Continuing and hoping for the best. + Temporal Index Log Vol end(meta) end(log) diff -Naurp pcp-4.3.2.orig/qa/group pcp-4.3.2/qa/group --- pcp-4.3.2.orig/qa/group 2020-11-04 10:43:55.184527945 +1100 +++ pcp-4.3.2/qa/group 2020-11-04 10:58:43.746826907 +1100 @@ -1614,6 +1614,8 @@ BAD 1269 libpcp local kernel 1274 pmlogextract pmdumplog labels help local sanity 1276 pmmgr containers local +1280 archive pmdumplog local +1281 archive pmdumplog valgrind local 1287 pmda.install pmda.prometheus local python 1289 pmval archive multi-archive decompress-xz local pmlogextract 1294 libpcp_mmv labels local valgrind diff -Naurp pcp-4.3.2.orig/src/libpcp/src/logutil.c pcp-4.3.2/src/libpcp/src/logutil.c --- pcp-4.3.2.orig/src/libpcp/src/logutil.c 2019-02-26 07:30:38.000000000 +1100 +++ pcp-4.3.2/src/libpcp/src/logutil.c 2020-11-04 10:44:15.722280608 +1100 @@ -2558,6 +2558,11 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, found = 0; sts = PM_ERR_LOGREC; /* default error condition */ f = NULL; + + /* + * start at last volume and work backwards until success or + * failure + */ for (vol = lcp->l_maxvol; vol >= lcp->l_minvol; vol--) { if (acp->ac_curvol == vol) { f = acp->ac_mfp; @@ -2571,11 +2576,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, if (__pmFstat(f, &sbuf) < 0) { /* if we can't stat() this one, then try previous volume(s) */ - if (f != acp->ac_mfp) { - __pmFclose(f); - f = NULL; - } - continue; + goto prior_vol; } if (vol == lcp->l_maxvol && sbuf.st_size == lcp->l_physend) { @@ -2588,16 +2589,14 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, /* if this volume is empty, try previous volume */ if (sbuf.st_size <= (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) { - if (f != acp->ac_mfp) { - __pmFclose(f); - f = NULL; - } - continue; + goto prior_vol; } physend = (__pm_off_t)sbuf.st_size; if (sizeof(off_t) > sizeof(__pm_off_t)) { + /* 64-bit off_t */ if (physend != sbuf.st_size) { + /* oops, 32-bit offset not the same */ pmNotifyErr(LOG_ERR, "pmGetArchiveEnd: PCP archive file" " (meta) too big (%"PRIi64" bytes)\n", (uint64_t)sbuf.st_size); @@ -2621,18 +2620,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, */ logend = (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int); for (i = lcp->l_numti - 1; i >= 0; i--) { - if (lcp->l_ti[i].ti_vol != vol) { - if (f != acp->ac_mfp) { - __pmFclose(f); - f = NULL; - } + if (lcp->l_ti[i].ti_vol != vol) continue; - } if (lcp->l_ti[i].ti_log <= physend) { logend = lcp->l_ti[i].ti_log; break; } } + if (i < 0) { + /* no dice in the temporal index, try previous volume */ + goto prior_vol; + } /* * Now chase it forwards from the last index entry ... @@ -2642,6 +2640,7 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, * valid record, so if not at start of volume, back up one * record, then scan forwards. */ + assert(f != NULL); __pmFseek(f, (long)logend, SEEK_SET); if (logend > (int)sizeof(__pmLogLabel) + 2*(int)sizeof(int)) { if (paranoidLogRead(ctxp, PM_MODE_BACK, f, &rp) < 0) { @@ -2681,10 +2680,17 @@ __pmGetArchiveEnd_ctx(__pmContext *ctxp, if (found) break; +prior_vol: /* * this probably means this volume contains no useful records, * try the previous volume */ + if (f != acp->ac_mfp) { + /* f comes from _logpeek(), close it */ + __pmFclose(f); + f = NULL; + } + }/*for*/ if (f == acp->ac_mfp)