Blame SOURCES/rsyslog-8.37.0-rhbz1627941-imfile-support-for-endmsg.regex.patch

c16178
From e8d64cbd15fa84907dc23f8b52d6f2f847b46fec Mon Sep 17 00:00:00 2001
c16178
From: Rich Megginson <rmeggins@redhat.com>
c16178
Date: Mon, 10 Sep 2018 17:25:38 -0600
c16178
Subject: [PATCH] imfile: support for endmsg.regex
c16178
c16178
This adds support for endmsg.regex.  It is similar to
c16178
startmsg.regex except that it matches the line that denotes
c16178
the end of the message, rather than the start of the next message.
c16178
This is primarily for container log file use cases such as this:
c16178
c16178
    date stdout P start of message
c16178
    date stdout P  middle of message
c16178
    date stdout F  end of message
c16178
c16178
The `F` means this is the line which contains the final part of
c16178
the message.  The fully assembled message should be
c16178
`start of message middle of message end of message`.
c16178
`startmsg.regex="^[^ ]+ stdout F "` will match.
c16178
c16178
(cherry picked from commit c902a0938fe163b5351829d2b72001d024895c16)
c16178
(cherry picked from commit dd4a72c4d52d8da98ed6b86114868e1a450ccb41)
c16178
---
c16178
 plugins/imfile/imfile.c                      |  44 ++++--
c16178
 plugins/imptcp/imptcp.c                      |  10 +-
c16178
 runtime/stream.c                             |  28 +++-
c16178
 runtime/stream.h                             |   2 +-
c16178
 4 files changed, 62 insertions(+), 20 deletions(-)
c16178
c16178
diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c
c16178
index 7767c9f02..87706082f 100644
c16178
--- a/plugins/imfile/imfile.c
c16178
+++ b/plugins/imfile/imfile.c
c16178
@@ -126,7 +126,9 @@ struct instanceConf_s {
c16178
 	sbool bRMStateOnDel;
c16178
 	uint8_t readMode;
c16178
 	uchar *startRegex;
c16178
-	regex_t end_preg;	/* compiled version of startRegex */
c16178
+	uchar *endRegex;
c16178
+	regex_t start_preg;	/* compiled version of startRegex */
c16178
+	regex_t end_preg;	/* compiled version of endRegex */
c16178
 	sbool discardTruncatedMsg;
c16178
 	sbool msgDiscardingError;
c16178
 	sbool escapeLF;
c16178
@@ -281,6 +283,7 @@ static struct cnfparamdescr inppdescr[] = {
c16178
 	{ "ruleset", eCmdHdlrString, 0 },
c16178
 	{ "readmode", eCmdHdlrInt, 0 },
c16178
 	{ "startmsg.regex", eCmdHdlrString, 0 },
c16178
+	{ "endmsg.regex", eCmdHdlrString, 0 },
c16178
 	{ "discardtruncatedmsg", eCmdHdlrBinary, 0 },
c16178
 	{ "msgdiscardingerror", eCmdHdlrBinary, 0 },
c16178
 	{ "escapelf", eCmdHdlrBinary, 0 },
c16178
@@ -1421,6 +1424,7 @@ pollFileReal(act_obj_t *act, cstr_t **pCStr)
c16178
 	int64 strtOffs;
c16178
 	DEFiRet;
c16178
 	int nProcessed = 0;
c16178
+	regex_t *start_preg = NULL, *end_preg = NULL;
c16178
 
c16178
 	DBGPRINTF("pollFileReal enter, pStrm %p, name '%s'\n", act->pStrm, act->name);
c16178
 	DBGPRINTF("pollFileReal enter, edge %p\n", act->edge);
c16178
@@ -1432,15 +1436,18 @@ pollFileReal(act_obj_t *act, cstr_t **pCStr)
c16178
 		CHKiRet(openFile(act)); /* open file */
c16178
 	}
c16178
 
c16178
+	start_preg = (inst->startRegex == NULL) ? NULL : &inst->start_preg;
c16178
+	end_preg = (inst->endRegex == NULL) ? NULL : &inst->end_preg;
c16178
+
c16178
 	/* loop below will be exited when strmReadLine() returns EOF */
c16178
 	while(glbl.GetGlobalInputTermState() == 0) {
c16178
 		if(inst->maxLinesAtOnce != 0 && nProcessed >= inst->maxLinesAtOnce)
c16178
 			break;
c16178
-		if(inst->startRegex == NULL) {
c16178
+		if((start_preg == NULL) && (end_preg == NULL)) {
c16178
 			CHKiRet(strm.ReadLine(act->pStrm, pCStr, inst->readMode, inst->escapeLF,
c16178
 				inst->trimLineOverBytes, &strtOffs));
c16178
 		} else {
c16178
-			CHKiRet(strmReadMultiLine(act->pStrm, pCStr, &inst->end_preg,
c16178
+			CHKiRet(strmReadMultiLine(act->pStrm, pCStr, start_preg, end_preg,
c16178
 				inst->escapeLF, inst->discardTruncatedMsg, inst->msgDiscardingError, &strtOffs));
c16178
 		}
c16178
 		++nProcessed;
c16178
@@ -1506,6 +1513,7 @@ createInstance(instanceConf_t **const pinst)
c16178
 	inst->iPersistStateInterval = 0;
c16178
 	inst->readMode = 0;
c16178
 	inst->startRegex = NULL;
c16178
+	inst->endRegex = NULL;
c16178
 	inst->discardTruncatedMsg = 0;
c16178
 	inst->msgDiscardingError = 1;
c16178
 	inst->bRMStateOnDel = 1;
c16178
@@ -1713,6 +1721,8 @@ CODESTARTnewInpInst
c16178
 			inst->readMode = (sbool) pvals[i].val.d.n;
c16178
 		} else if(!strcmp(inppblk.descr[i].name, "startmsg.regex")) {
c16178
 			inst->startRegex = (uchar*)es_str2cstr(pvals[i].val.d.estr, NULL);
c16178
+		} else if(!strcmp(inppblk.descr[i].name, "endmsg.regex")) {
c16178
+			inst->endRegex = (uchar*)es_str2cstr(pvals[i].val.d.estr, NULL);
c16178
 		} else if(!strcmp(inppblk.descr[i].name, "discardtruncatedmsg")) {
c16178
 			inst->discardTruncatedMsg = (sbool) pvals[i].val.d.n;
c16178
 		} else if(!strcmp(inppblk.descr[i].name, "msgdiscardingerror")) {
c16178
@@ -1753,19 +1763,31 @@ CODESTARTnewInpInst
c16178
 			  "param '%s'\n", inppblk.descr[i].name);
c16178
 		}
c16178
 	}
c16178
-	if(inst->readMode != 0 &&  inst->startRegex != NULL) {
c16178
+	i = (inst->readMode > 0) ? 1 : 0;
c16178
+	i = (NULL != inst->startRegex) ? (i+1) : i;
c16178
+	i = (NULL != inst->endRegex) ? (i+1) : i;
c16178
+	if(i > 1) {
c16178
 		LogError(0, RS_RET_PARAM_NOT_PERMITTED,
c16178
-			"readMode and startmsg.regex cannot be set "
c16178
-			"at the same time --- remove one of them");
c16178
+			"only one of readMode or startmsg.regex or endmsg.regex can be set "
c16178
+			"at the same time");
c16178
 			ABORT_FINALIZE(RS_RET_PARAM_NOT_PERMITTED);
c16178
 	}
c16178
 
c16178
 	if(inst->startRegex != NULL) {
c16178
-		const int errcode = regcomp(&inst->end_preg, (char*)inst->startRegex, REG_EXTENDED);
c16178
+		const int errcode = regcomp(&inst->start_preg, (char*)inst->startRegex, REG_EXTENDED);
c16178
+		if(errcode != 0) {
c16178
+			char errbuff[512];
c16178
+			regerror(errcode, &inst->start_preg, errbuff, sizeof(errbuff));
c16178
+			parser_errmsg("imfile: error in startmsg.regex expansion: %s", errbuff);
c16178
+			ABORT_FINALIZE(RS_RET_ERR);
c16178
+		}
c16178
+	}
c16178
+	if(inst->endRegex != NULL) {
c16178
+		const int errcode = regcomp(&inst->end_preg, (char*)inst->endRegex, REG_EXTENDED);
c16178
 		if(errcode != 0) {
c16178
 			char errbuff[512];
c16178
 			regerror(errcode, &inst->end_preg, errbuff, sizeof(errbuff));
c16178
-			parser_errmsg("imfile: error in regex expansion: %s", errbuff);
c16178
+			parser_errmsg("imfile: error in endmsg.regex expansion: %s", errbuff);
c16178
 			ABORT_FINALIZE(RS_RET_ERR);
c16178
 		}
c16178
 	}
c16178
@@ -1970,9 +1992,13 @@ CODESTARTfreeCnf
c16178
 		free(inst->pszStateFile);
c16178
 		free(inst->pszFileName_forOldStateFile);
c16178
 		if(inst->startRegex != NULL) {
c16178
-			regfree(&inst->end_preg);
c16178
+			regfree(&inst->start_preg);
c16178
 			free(inst->startRegex);
c16178
 		}
c16178
+		if(inst->endRegex != NULL) {
c16178
+			regfree(&inst->end_preg);
c16178
+			free(inst->endRegex);
c16178
+		}
c16178
 		del = inst;
c16178
 		inst = inst->next;
c16178
 		free(del);
c16178
diff --git a/plugins/imptcp/imptcp.c b/plugins/imptcp/imptcp.c
c16178
index 9b6be0f40..a94b97f41 100644
c16178
--- a/plugins/imptcp/imptcp.c
c16178
+++ b/plugins/imptcp/imptcp.c
c16178
@@ -162,7 +162,7 @@ struct instanceConf_s {
c16178
 	int ratelimitInterval;
c16178
 	int ratelimitBurst;
c16178
 	uchar *startRegex;
c16178
-	regex_t end_preg;	/* compiled version of startRegex */
c16178
+	regex_t start_preg;	/* compiled version of startRegex */
c16178
 	struct instanceConf_s *next;
c16178
 };
c16178
 
c16178
@@ -961,7 +961,7 @@ processDataRcvd_regexFraming(ptcpsess_t *const __restrict__ pThis,
c16178
 	if(c == '\n') {
c16178
 		pThis->iCurrLine = pThis->iMsg;
c16178
 	} else {
c16178
-		const int isMatch = !regexec(&inst->end_preg, (char*)pThis->pMsg+pThis->iCurrLine, 0, NULL, 0);
c16178
+		const int isMatch = !regexec(&inst->start_preg, (char*)pThis->pMsg+pThis->iCurrLine, 0, NULL, 0);
c16178
 		if(isMatch) {
c16178
 			DBGPRINTF("regex match (%d), framing line: %s\n", pThis->iCurrLine, pThis->pMsg);
c16178
 			strcpy((char*)pThis->pMsg_save, (char*) pThis->pMsg+pThis->iCurrLine);
c16178
@@ -2188,10 +2188,10 @@ CODESTARTnewInpInst
c16178
 	}
c16178
 
c16178
 	if(inst->startRegex != NULL) {
c16178
-		const int errcode = regcomp(&inst->end_preg, (char*)inst->startRegex, REG_EXTENDED);
c16178
+		const int errcode = regcomp(&inst->start_preg, (char*)inst->startRegex, REG_EXTENDED);
c16178
 		if(errcode != 0) {
c16178
 			char errbuff[512];
c16178
-			regerror(errcode, &inst->end_preg, errbuff, sizeof(errbuff));
c16178
+			regerror(errcode, &inst->start_preg, errbuff, sizeof(errbuff));
c16178
 			parser_errmsg("imptcp: error in framing.delimiter.regex expansion: %s", errbuff);
c16178
 			ABORT_FINALIZE(RS_RET_ERR);
c16178
 		}
c16178
@@ -2348,7 +2348,7 @@ CODESTARTfreeCnf
c16178
 		free(inst->pszInputName);
c16178
 		free(inst->dfltTZ);
c16178
 		if(inst->startRegex != NULL) {
c16178
-			regfree(&inst->end_preg);
c16178
+			regfree(&inst->start_preg);
c16178
 			free(inst->startRegex);
c16178
 		}
c16178
 		del = inst;
c16178
diff --git a/runtime/stream.c b/runtime/stream.c
c16178
index 6b7e7028e..0f4197103 100644
c16178
--- a/runtime/stream.c
c16178
+++ b/runtime/stream.c
c16178
@@ -942,12 +942,12 @@ strmReadMultiLine_isTimedOut(const strm_t *const __restrict__ pThis)
c16178
 
c16178
 /* read a multi-line message from a strm file.
c16178
  * The multi-line message is terminated based on the user-provided
c16178
- * startRegex (Posix ERE). For performance reasons, the regex
c16178
+ * startRegex or endRegex (Posix ERE). For performance reasons, the regex
c16178
  * must already have been compiled by the user.
c16178
  * added 2015-05-12 rgerhards
c16178
  */
c16178
 rsRetVal
c16178
-strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *preg, const sbool bEscapeLF,
c16178
+strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *start_preg, regex_t *end_preg, const sbool bEscapeLF,
c16178
 	const sbool discardTruncatedMsg, const sbool msgDiscardingError, int64 *const strtOffs)
c16178
 {
c16178
 	uchar c;
c16178
@@ -979,9 +979,14 @@ strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *preg, const sbool bEs
c16178
 		cstrFinalize(thisLine);
c16178
 
c16178
 		/* we have a line, now let's assemble the message */
c16178
-		const int isMatch = !regexec(preg, (char*)rsCStrGetSzStrNoNULL(thisLine), 0, NULL, 0);
c16178
-
c16178
-		if(isMatch) {
c16178
+		const int isStartMatch = start_preg ?
c16178
+				!regexec(start_preg, (char*)rsCStrGetSzStrNoNULL(thisLine), 0, NULL, 0) :
c16178
+				0;
c16178
+		const int isEndMatch = end_preg ?
c16178
+				!regexec(end_preg, (char*)rsCStrGetSzStrNoNULL(thisLine), 0, NULL, 0) :
c16178
+				0;
c16178
+
c16178
+		if(isStartMatch) {
c16178
 			/* in this case, the *previous* message is complete and we are
c16178
 			 * at the start of a new one.
c16178
 			 */
c16178
@@ -1047,6 +1052,19 @@ strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *preg, const sbool bEs
c16178
 				}
c16178
 			}
c16178
 		}
c16178
+		if(isEndMatch) {
c16178
+			/* in this case, the *current* message is complete and we are
c16178
+			 * at the end of it.
c16178
+			 */
c16178
+			if(pThis->ignoringMsg == 0) {
c16178
+				if(pThis->prevMsgSegment != NULL) {
c16178
+					finished = 1;
c16178
+					*ppCStr = pThis->prevMsgSegment;
c16178
+					pThis->prevMsgSegment= NULL;
c16178
+				}
c16178
+			}
c16178
+			pThis->ignoringMsg = 0;
c16178
+		}
c16178
 		cstrDestruct(&thisLine);
c16178
 	} while(finished == 0);
c16178
 
c16178
diff --git a/runtime/stream.h b/runtime/stream.h
c16178
index 71596879e..7dc597ff5 100644
c16178
--- a/runtime/stream.h
c16178
+++ b/runtime/stream.h
c16178
@@ -225,7 +225,7 @@ ENDinterface(strm)
c16178
 /* prototypes */
c16178
 PROTOTYPEObjClassInit(strm);
c16178
 rsRetVal strmMultiFileSeek(strm_t *pThis, unsigned int fileNum, off64_t offs, off64_t *bytesDel);
c16178
-rsRetVal strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *preg,
c16178
+rsRetVal strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, regex_t *start_preg, regex_t *end_preg,
c16178
 	sbool bEscapeLF, sbool discardTruncatedMsg, sbool msgDiscardingError, int64 *const strtOffs);
c16178
 int strmReadMultiLine_isTimedOut(const strm_t *const __restrict__ pThis);
c16178
 void strmDebugOutBuf(const strm_t *const pThis);