|
|
a656d6 |
From: Jiri Vymazal <jvymazal@redhat.com>
|
|
|
a656d6 |
Date: Wed, 14 Mar 2018 90:05:01 -0500
|
|
|
a656d6 |
|
|
|
a656d6 |
modification and merge of below patches for RHEL consumers,
|
|
|
a656d6 |
also modified journal invalidate/rotation handling to keep possibility
|
|
|
a656d6 |
to continue after switch of persistent journal
|
|
|
a656d6 |
original:
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%From a99f9b4b42d261c384aee09306fc421df2cca7a5 Mon Sep 17 00:00:00 2001
|
|
|
a656d6 |
%From: Peter Portante <peter.a.portante@gmail.com>
|
|
|
a656d6 |
%Date: Wed, 24 Jan 2018 19:34:41 -0500
|
|
|
a656d6 |
%Subject: [PATCH] Proposed fix for handling journal correctly
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%The fix is to immediately setup the inotify file descriptor via
|
|
|
a656d6 |
%`sd_journal_get_fd()` right after a journal open, and then
|
|
|
a656d6 |
%periodically call `sd_journal_process()` to give the client API
|
|
|
a656d6 |
%library a chance to detect deleted journal files on disk that need to
|
|
|
a656d6 |
%be closed so they can be properly erased by the file system.
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%We remove the open/close dance and simplify that code as a result.
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%Fixes issue #2436.
|
|
|
a656d6 |
and also:
|
|
|
a656d6 |
%From 27f96c84d34ee000fbb5d45b00233f2ec3cf2d8a Mon Sep 17 00:00:00 2001
|
|
|
a656d6 |
%From: Rainer Gerhards <rgerhards@adiscon.com>
|
|
|
a656d6 |
%Date: Tue, 24 Oct 2017 16:14:13 +0200
|
|
|
a656d6 |
%Subject: [PATCH] imjournal bugfix: do not disable itself on error
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%If some functions calls inside the main loop failed, imjournal exited
|
|
|
a656d6 |
%with an error code, actually disabling all logging from the journal.
|
|
|
a656d6 |
%This was probably never intended.
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%This patch makes imjournal recover the situation instead.
|
|
|
a656d6 |
%
|
|
|
a656d6 |
%closes https://github.com/rsyslog/rsyslog/issues/1895
|
|
|
a656d6 |
---
|
|
|
a656d6 |
plugins/imjournal/imjournal.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------
|
|
|
a656d6 |
1 file changed, 104 insertions(+), 102 deletions(-)
|
|
|
a656d6 |
|
|
|
a656d6 |
--- a/plugins/imjournal/imjournal.c
|
|
|
a656d6 |
+++ b/plugins/imjournal/imjournal.c
|
|
|
a656d6 |
@@ -114,6 +114,10 @@ /* module-global parameters */
|
|
|
a656d6 |
static const char *pid_field_name; /* read-only after startup */
|
|
|
a656d6 |
static ratelimit_t *ratelimiter = NULL;
|
|
|
a656d6 |
static sd_journal *j;
|
|
|
a656d6 |
+static int j_inotify_fd;
|
|
|
a656d6 |
+static char *last_cursor = NULL;
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+#define J_PROCESS_PERIOD 1024 /* Call sd_journal_process() every 1,024 records */
|
|
|
a656d6 |
|
|
|
a656d6 |
static rsRetVal persistJournalState(void);
|
|
|
a656d6 |
static rsRetVal loadJournalState(void);
|
|
|
a656d6 |
@@ -123,6 +127,14 @@ openJournal(sd_journal** jj)
|
|
|
a656d6 |
|
|
|
a656d6 |
if (sd_journal_open(jj, SD_JOURNAL_LOCAL_ONLY) < 0)
|
|
|
a656d6 |
iRet = RS_RET_IO_ERROR;
|
|
|
a656d6 |
+ int r;
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ if ((r = sd_journal_get_fd(j)) < 0) {
|
|
|
a656d6 |
+ errmsg.LogError(-r, RS_RET_IO_ERROR, "imjournal: sd_journal_get_fd() failed");
|
|
|
a656d6 |
+ iRet = RS_RET_IO_ERROR;
|
|
|
a656d6 |
+ } else {
|
|
|
a656d6 |
+ j_inotify_fd = r;
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
RETiRet;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
@@ -132,6 +144,7 @@ closeJournal(sd_journal** jj)
|
|
|
a656d6 |
persistJournalState();
|
|
|
a656d6 |
}
|
|
|
a656d6 |
sd_journal_close(*jj);
|
|
|
a656d6 |
+ j_inotify_fd = 0;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
|
|
|
a656d6 |
@@ -262,6 +275,7 @@ readjournal(void)
|
|
|
a656d6 |
char *message = NULL;
|
|
|
a656d6 |
char *sys_iden = NULL;
|
|
|
a656d6 |
char *sys_iden_help = NULL;
|
|
|
a656d6 |
+ char *c = NULL;
|
|
|
a656d6 |
|
|
|
a656d6 |
const void *get;
|
|
|
a656d6 |
const void *pidget;
|
|
|
a656d6 |
@@ -393,6 +407,12 @@ readjournal(void)
|
|
|
a656d6 |
tv.tv_usec = timestamp % 1000000;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
+ sd_journal_get_cursor(j, &c);
|
|
|
a656d6 |
+ if (c) {
|
|
|
a656d6 |
+ free(last_cursor);
|
|
|
a656d6 |
+ last_cursor = c;
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
/* submit message */
|
|
|
a656d6 |
enqMsg((uchar *)message, (uchar *) sys_iden_help, facility, severity, &tv, json, 0);
|
|
|
a656d6 |
|
|
|
a656d6 |
@@ -413,44 +433,41 @@ persistJournalState (void)
|
|
|
a656d6 |
DEFiRet;
|
|
|
a656d6 |
FILE *sf; /* state file */
|
|
|
a656d6 |
char tmp_sf[MAXFNAME];
|
|
|
a656d6 |
- char *cursor;
|
|
|
a656d6 |
- int ret = 0;
|
|
|
a656d6 |
+ int r = 0;
|
|
|
a656d6 |
|
|
|
a656d6 |
- /* On success, sd_journal_get_cursor() returns 1 in systemd
|
|
|
a656d6 |
- 197 or older and 0 in systemd 198 or newer */
|
|
|
a656d6 |
- if ((ret = sd_journal_get_cursor(j, &cursor)) >= 0) {
|
|
|
a656d6 |
- /* we create a temporary name by adding a ".tmp"
|
|
|
a656d6 |
- * suffix to the end of our state file's name
|
|
|
a656d6 |
- */
|
|
|
a656d6 |
- snprintf(tmp_sf, sizeof(tmp_sf), "%s.tmp", cs.stateFile);
|
|
|
a656d6 |
- if ((sf = fopen(tmp_sf, "wb")) != NULL) {
|
|
|
a656d6 |
- if (fprintf(sf, "%s", cursor) < 0) {
|
|
|
a656d6 |
- iRet = RS_RET_IO_ERROR;
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
- fclose(sf);
|
|
|
a656d6 |
- free(cursor);
|
|
|
a656d6 |
- /* change the name of the file to the configured one */
|
|
|
a656d6 |
- if (iRet == RS_RET_OK && rename(tmp_sf, cs.stateFile) == -1) {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- iRet = RS_RET_IO_ERROR;
|
|
|
a656d6 |
- errmsg.LogError(0, iRet, "rename() failed: "
|
|
|
a656d6 |
- "'%s', new path: '%s'\n", errStr, cs.stateFile);
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
+ if (!last_cursor)
|
|
|
a656d6 |
+ ABORT_FINALIZE(RS_RET_OK);
|
|
|
a656d6 |
|
|
|
a656d6 |
- } else {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_FOPEN_FAILURE, "fopen() failed: "
|
|
|
a656d6 |
- "'%s', path: '%s'\n", errStr, tmp_sf);
|
|
|
a656d6 |
- iRet = RS_RET_FOPEN_FAILURE;
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
- } else {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
- rs_strerror_r(-(ret), errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_ERR, "sd_journal_get_cursor() failed: '%s'\n", errStr);
|
|
|
a656d6 |
- iRet = RS_RET_ERR;
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
+ /* we create a temporary name by adding a ".tmp"
|
|
|
a656d6 |
+ * suffix to the end of our state file's name
|
|
|
a656d6 |
+ */
|
|
|
a656d6 |
+ snprintf(tmp_sf, sizeof(tmp_sf), "%s.tmp", cs.stateFile);
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ sf = fopen(tmp_sf, "wb");
|
|
|
a656d6 |
+ if (!sf) {
|
|
|
a656d6 |
+ errmsg.LogError(errno, RS_RET_FOPEN_FAILURE, "imjournal: fopen() failed for path: '%s'", tmp_sf);
|
|
|
a656d6 |
+ ABORT_FINALIZE(RS_RET_FOPEN_FAILURE);
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ r = fprintf(sf, "%s", last_cursor);
|
|
|
a656d6 |
+ if (r < 0) {
|
|
|
a656d6 |
+ errmsg.LogError(errno, RS_RET_FOPEN_FAILURE, "imjournal: failed to save cursor to: '%s'", tmp_sf);
|
|
|
a656d6 |
+ ABORT_FINALIZE(RS_RET_IO_ERROR);
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ r = fclose(sf);
|
|
|
a656d6 |
+ if (r < 0) {
|
|
|
a656d6 |
+ errmsg.LogError(errno, iRet, "imjournal: fclose() failed for path: '%s'", tmp_sf);
|
|
|
a656d6 |
+ ABORT_FINALIZE(RS_RET_IO_ERROR);
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ r = rename(tmp_sf, cs.stateFile);
|
|
|
a656d6 |
+ if (r < 0) {
|
|
|
a656d6 |
+ errmsg.LogError(errno, iRet, "imjournal: rename() failed for new path: '%s'", cs.stateFile);
|
|
|
a656d6 |
+ ABORT_FINALIZE(RS_RET_IO_ERROR);
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+finalize_it:
|
|
|
a656d6 |
RETiRet;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
@@ -473,64 +473,29 @@
|
|
|
a656d6 |
* except for the special handling of EINTR.
|
|
|
a656d6 |
*/
|
|
|
a656d6 |
|
|
|
a656d6 |
-#define POLL_TIMEOUT 1000 /* timeout for poll is 1s */
|
|
|
a656d6 |
+#define POLL_TIMEOUT 900000 /* timeout for poll is 900ms */
|
|
|
a656d6 |
|
|
|
a656d6 |
static rsRetVal
|
|
|
a656d6 |
pollJournal(void)
|
|
|
a656d6 |
{
|
|
|
a656d6 |
DEFiRet;
|
|
|
a656d6 |
- struct pollfd pollfd;
|
|
|
a656d6 |
- int pr = 0;
|
|
|
a656d6 |
- int jr = 0;
|
|
|
a656d6 |
-
|
|
|
a656d6 |
- pollfd.fd = sd_journal_get_fd(j);
|
|
|
a656d6 |
- pollfd.events = sd_journal_get_events(j);
|
|
|
a656d6 |
- pr = poll(&pollfd, 1, POLL_TIMEOUT);
|
|
|
a656d6 |
- if (pr == -1) {
|
|
|
a656d6 |
- if (errno == EINTR) {
|
|
|
a656d6 |
- /* EINTR is also received during termination
|
|
|
a656d6 |
- * so return now to check the term state.
|
|
|
a656d6 |
- */
|
|
|
a656d6 |
- ABORT_FINALIZE(RS_RET_OK);
|
|
|
a656d6 |
- } else {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
-
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_ERR,
|
|
|
a656d6 |
- "poll() failed: '%s'", errStr);
|
|
|
a656d6 |
- ABORT_FINALIZE(RS_RET_ERR);
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
+ int r;
|
|
|
a656d6 |
|
|
|
a656d6 |
+ for (;;) {
|
|
|
a656d6 |
+ r = sd_journal_wait(j, POLL_TIMEOUT);
|
|
|
a656d6 |
+ break;
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
|
|
|
a656d6 |
- jr = sd_journal_process(j);
|
|
|
a656d6 |
-
|
|
|
a656d6 |
- if (pr == 1 && jr == SD_JOURNAL_INVALIDATE) {
|
|
|
a656d6 |
- /* do not persist stateFile sd_journal_get_cursor will fail! */
|
|
|
a656d6 |
- char* tmp = cs.stateFile;
|
|
|
a656d6 |
- cs.stateFile = NULL;
|
|
|
a656d6 |
+ if (r == SD_JOURNAL_INVALIDATE) {
|
|
|
a656d6 |
closeJournal(&j);
|
|
|
a656d6 |
- cs.stateFile = tmp;
|
|
|
a656d6 |
|
|
|
a656d6 |
iRet = openJournal(&j);
|
|
|
a656d6 |
- if (iRet != RS_RET_OK) {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_IO_ERROR,
|
|
|
a656d6 |
- "sd_journal_open() failed: '%s'", errStr);
|
|
|
a656d6 |
+ if (iRet != RS_RET_OK)
|
|
|
a656d6 |
ABORT_FINALIZE(RS_RET_ERR);
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
|
|
|
a656d6 |
- if(cs.stateFile != NULL){
|
|
|
a656d6 |
+ if (cs.stateFile)
|
|
|
a656d6 |
iRet = loadJournalState();
|
|
|
a656d6 |
- }
|
|
|
a656d6 |
- LogMsg(0, RS_RET_OK, LOG_NOTICE, "imjournal: journal reloaded...");
|
|
|
a656d6 |
- } else if (jr < 0) {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_ERR,
|
|
|
a656d6 |
- "sd_journal_process() failed: '%s'", errStr);
|
|
|
a656d6 |
- ABORT_FINALIZE(RS_RET_ERR);
|
|
|
a656d6 |
+ errmsg.LogMsg(0, RS_RET_OK, LOG_NOTICE, "imjournal: journal reloaded...");
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
finalize_it:
|
|
|
a656d6 |
@@ -631,8 +612,17 @@ loadJournalState(void)
|
|
|
a656d6 |
RETiRet;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
+static void
|
|
|
a656d6 |
+tryRecover(void) {
|
|
|
a656d6 |
+ errmsg.LogMsg(0, RS_RET_OK, LOG_INFO, "imjournal: trying to recover from unexpected "
|
|
|
a656d6 |
+ "journal error");
|
|
|
a656d6 |
+ closeJournal(&j);
|
|
|
a656d6 |
+ srSleep(10, 0); // do not hammer machine with too-frequent retries
|
|
|
a656d6 |
+ openJournal(&j);
|
|
|
a656d6 |
+}
|
|
|
a656d6 |
+
|
|
|
a656d6 |
BEGINrunInput
|
|
|
a656d6 |
- int count = 0;
|
|
|
a656d6 |
+ uint64_t count = 0;
|
|
|
a656d6 |
CODESTARTrunInput
|
|
|
a656d6 |
CHKiRet(ratelimitNew(&ratelimiter, "imjournal", NULL));
|
|
|
a656d6 |
dbgprintf("imjournal: ratelimiting burst %d, interval %d\n", cs.ratelimitBurst,
|
|
|
a656d6 |
@@ -665,26 +655,38 @@ CODESTARTrunInput
|
|
|
a656d6 |
|
|
|
a656d6 |
r = sd_journal_next(j);
|
|
|
a656d6 |
if (r < 0) {
|
|
|
a656d6 |
- char errStr[256];
|
|
|
a656d6 |
-
|
|
|
a656d6 |
- rs_strerror_r(errno, errStr, sizeof(errStr));
|
|
|
a656d6 |
- errmsg.LogError(0, RS_RET_ERR,
|
|
|
a656d6 |
- "sd_journal_next() failed: '%s'", errStr);
|
|
|
a656d6 |
- ABORT_FINALIZE(RS_RET_ERR);
|
|
|
a656d6 |
+ tryRecover();
|
|
|
a656d6 |
+ continue;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
if (r == 0) {
|
|
|
a656d6 |
/* No new messages, wait for activity. */
|
|
|
a656d6 |
- CHKiRet(pollJournal());
|
|
|
a656d6 |
+ if (pollJournal() != RS_RET_OK) {
|
|
|
a656d6 |
+ tryRecover();
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
continue;
|
|
|
a656d6 |
}
|
|
|
a656d6 |
|
|
|
a656d6 |
- CHKiRet(readjournal());
|
|
|
a656d6 |
+ if (readjournal() != RS_RET_OK) {
|
|
|
a656d6 |
+ tryRecover();
|
|
|
a656d6 |
+ continue;
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ count++;
|
|
|
a656d6 |
+
|
|
|
a656d6 |
+ if ((count % J_PROCESS_PERIOD) == 0) {
|
|
|
a656d6 |
+ /* Give the journal a periodic chance to detect rotated journal files to be cleaned up. */
|
|
|
a656d6 |
+ r = sd_journal_process(j);
|
|
|
a656d6 |
+ if (r < 0) {
|
|
|
a656d6 |
+ errmsg.LogError(-r, RS_RET_ERR, "imjournal: sd_journal_process() failed");
|
|
|
a656d6 |
+ tryRecover();
|
|
|
a656d6 |
+ continue;
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+ }
|
|
|
a656d6 |
+
|
|
|
a656d6 |
if (cs.stateFile) { /* can't persist without a state file */
|
|
|
a656d6 |
/* TODO: This could use some finer metric. */
|
|
|
a656d6 |
- count++;
|
|
|
a656d6 |
- if (count == cs.iPersistStateInterval) {
|
|
|
a656d6 |
- count = 0;
|
|
|
a656d6 |
+ if ((count % cs.iPersistStateInterval) == 0) {
|
|
|
a656d6 |
persistJournalState();
|
|
|
a656d6 |
}
|
|
|
a656d6 |
}
|