Blob Blame History Raw
From 70ea76962bfe9ffed2bd604898e43e0e3b17645d Mon Sep 17 00:00:00 2001
From: Rainer Gerhards <rgerhards@adiscon.com>
Date: Thu, 23 Aug 2018 10:15:21 +0200
Subject: [PATCH] omfile: implement file-id, used in state file

This ensures that files with the same inodes are not accidently treated
as equal, at least within the limits of the file id hash (see doc for
details).

We use the siphash reference implementation to generate our non-cryptographic
hash.

State file handling was invalid. When a file was moved and re-created
rsyslog could use the file_id if the new file to write the old files'
state file. This could make the file reader stuck until it reached the
previous offset. Depending on file sizes this could never happen AND
would cause large message loss. This situation was timing dependent
(a race) and most frequently occurred under log rotation. In polling
mode the bug was less likely, but could also occur.
---
 plugins/imfile/Makefile.am      |   2 +-
 plugins/imfile/imfile.c         | 190 ++++++++++++++++++-----
 plugins/imfile/siphash.c        | 185 ++++++++++++++++++++++++++++++++
 3 files changed, 381 insertions(+), 19 deletions(-)
 create mode 100644 plugins/imfile/siphash.c

diff --git a/plugins/imfile/Makefile.am b/plugins/imfile/Makefile.am
index f4df0ed687..9e137efdc8 100644
--- a/plugins/imfile/Makefile.am
+++ b/plugins/imfile/Makefile.am
@@ -1,6 +1,6 @@
 pkglib_LTLIBRARIES = imfile.la
 
-imfile_la_SOURCES = imfile.c
+imfile_la_SOURCES = imfile.c siphash.c
 imfile_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS)
 imfile_la_LDFLAGS = -module -avoid-version
 imfile_la_LIBADD = 
diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c
index 4bc6078bda..14f4f1f495 100644
--- a/plugins/imfile/imfile.c
+++ b/plugins/imfile/imfile.c
@@ -66,6 +66,8 @@
 MODULE_CNFNAME("imfile")
 
 /* defines */
+#define FILE_ID_HASH_SIZE 20	/* max size of a file_id hash */
+#define FILE_ID_SIZE	512	/* how many bytes are used for file-id? */
 
 /* Module static data */
 DEF_IMOD_STATIC_DATA	/* must be present, starts static data */
@@ -75,6 +77,9 @@
 DEFobjCurrIf(prop)
 DEFobjCurrIf(ruleset)
 
+extern int rs_siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+	uint8_t *out, const size_t outlen); /* see siphash.c */
+
 static int bLegacyCnfModGlobalsPermitted;/* are legacy module-global config parameters permitted? */
 
 #define NUM_MULTISUB 1024 /* default max number of submits */
@@ -155,8 +160,10 @@
 	int wd;
 	time_t timeoutBase; /* what time to calculate the timeout against? */
 	/* file dynamic data */
+	char file_id[FILE_ID_HASH_SIZE]; /* file id for this entry, once we could obtain it */
 	int in_move;	/* workaround for inotify move: if set, state file must not be deleted */
 	ino_t ino;	/* current inode nbr */
+	int fd;		/* fd to file in order to obtain file_id (needs to be preserved across move) */
 	strm_t *pStrm;	/* its stream (NULL if not assigned) */
 	int nRecords; /**< How many records did we process before persisting the stream? */
 	ratelimit_t *ratelimiter;
@@ -187,7 +194,7 @@
 static int getBasename(uchar *const __restrict__ basen, uchar *const __restrict__ path);
 static void act_obj_unlink(act_obj_t *act);
 static uchar * getStateFileName(const act_obj_t *, uchar *, const size_t);
-static int getFullStateFileName(const uchar *const, uchar *const pszout, const size_t ilenout);
+static int getFullStateFileName(const uchar *const, const char *const, uchar *const pszout, const size_t ilenout);
 
 
 #define OPMODE_POLLING 0
@@ -328,7 +335,7 @@
 		  act->name, statefn);
 
 	/* Get full path and file name */
-	lenSFNam = getFullStateFileName(statefn, pszSFNam, sizeof(pszSFNam));
+	lenSFNam = getFullStateFileName(statefn, "", pszSFNam, sizeof(pszSFNam));
 
 	/* check if the file exists */
 	if(stat((char*) pszSFNam, &stat_buf) == -1) {
@@ -561,16 +568,25 @@
                        }
 		}
 	}
+	DBGPRINTF("need to add new active object '%s' in '%s' - checking if accessible\n", name, edge->path);
+	const int fd = open(name, O_RDONLY | O_CLOEXEC);
+	if(fd < 0) {
+		if (is_file) { LogMsg(errno, RS_RET_ERR, LOG_WARNING, "imfile: error accessing file '%s'", name);
+		} else { DBGPRINTF("imfile: error accessing file '%s'", name); }
+		ABORT_FINALIZE(RS_RET_NO_FILE_ACCESS);
+	}
 	DBGPRINTF("add new active object '%s' in '%s'\n", name, edge->path);
 	CHKmalloc(act = calloc(sizeof(act_obj_t), 1));
 	CHKmalloc(act->name = strdup(name));
-       if (-1 == getBasename((uchar*)basename, (uchar*)name)) {
-               CHKmalloc(act->basename = strdup(name)); /* assume basename is same as name */
-       } else {
-               CHKmalloc(act->basename = strdup(basename));
-       }
+	if (-1 == getBasename((uchar*)basename, (uchar*)name)) {
+		CHKmalloc(act->basename = strdup(name)); /* assume basename is same as name */
+	} else {
+		CHKmalloc(act->basename = strdup(basename));
+	}
 	act->edge = edge;
 	act->ino = ino;
+	act->fd = fd;
+	act->file_id[0] = '\0';
 	act->is_symlink = is_symlink;
        if (source) { /* we are target of symlink */
                CHKmalloc(act->source_name = strdup(source));
@@ -813,7 +828,7 @@
 		pollFile(act); /* get any left-over data */
 		if(inst->bRMStateOnDel) {
 			statefn = getStateFileName(act, statefile, sizeof(statefile));
-			getFullStateFileName(statefn, toDel, sizeof(toDel));
+			getFullStateFileName(statefn, "", toDel, sizeof(toDel)); // TODO: check!
 			statefn = toDel;
 		}
 		persistStrmState(act);
@@ -832,6 +847,9 @@
 		wdmapDel(act->wd);
 	}
 	#endif
+	if(act->fd >= 0) {
+		close(act->fd);
+	}
 	#if defined(OS_SOLARIS) && defined (HAVE_PORT_SOURCE_FILE)
 	if(act->pfinf != NULL) {
 		free(act->pfinf->fobj.fo_name);
@@ -1029,7 +1047,7 @@
  * open or otherwise modify disk file state.
  */
 static int
-getFullStateFileName(const uchar *const pszstatefile, uchar *const pszout, const size_t ilenout)
+getFullStateFileName(const uchar *const pszstatefile, const char *const file_id, uchar *const pszout, const size_t ilenout)
 {
 	int lenout;
 	const uchar* pszworkdir;
@@ -1038,14 +1056,69 @@
 	pszworkdir = glblGetWorkDirRaw();
 
 	/* Construct file name */
-	lenout = snprintf((char*)pszout, ilenout, "%s/%s",
-			     (char*) (pszworkdir == NULL ? "." : (char*) pszworkdir), (char*)pszstatefile);
+	lenout = snprintf((char*)pszout, ilenout, "%s/%s%s%s",
+		(char*) (pszworkdir == NULL ? "." : (char*) pszworkdir), (char*)pszstatefile,
+		(*file_id == '\0') ? "" : ":", file_id);
 
 	/* return out length */
 	return lenout;
 }
 
 
+/* hash function for file-id
+ * Takes a block of data and returns a string with the hash value.
+ *
+ * Currently one provided by Aaaron Wiebe based on perl's hashing algorithm
+ * (so probably pretty generic). Not for excessively large strings!
+ * TODO: re-think the hash function!
+ */
+#if defined(__clang__)
+#pragma GCC diagnostic ignored "-Wunknown-attributes"
+#endif
+static void __attribute__((nonnull(1,3)))
+#if defined(__clang__)
+__attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+get_file_id_hash(const char *data, size_t lendata,
+	char *const hash_str, const size_t len_hash_str)
+{
+	assert(len_hash_str >= 17); /* we always generate 8-byte strings */
+
+	size_t i;
+	uint8_t out[8], k[16];
+	for (i = 0; i < 16; ++i)
+		k[i] = i;
+	memset(out, 0, sizeof(out));
+	rs_siphash((const uint8_t *)data, lendata, k, out, 8);
+
+	for(i = 0 ; i < 8 ; ++i) {
+		if(2 * i+1 >= len_hash_str)
+			break;
+		snprintf(hash_str+(2*i), 3, "%2.2x", out[i]);
+	}
+}
+
+
+
+/* this returns the file-id for a given file
+ */
+static void getFileID(act_obj_t *const act)
+{
+	/* save the old id for cleaning purposes */
+	strncpy(act->file_id_prev, (const char*)act->file_id, FILE_ID_HASH_SIZE);
+	act->file_id[0] = '\0';
+	assert(act->fd >= 0); /* fd must have been opened at act_obj_t creation! */
+	char filedata[FILE_ID_SIZE];
+	lseek(act->fd, 0, SEEK_SET); /* Seek to beginning of file so we have correct id */
+	const int r = read(act->fd, filedata, FILE_ID_SIZE);
+	if(r == FILE_ID_SIZE) {
+		get_file_id_hash(filedata, sizeof(filedata), act->file_id, sizeof(act->file_id));
+	} else {
+		DBGPRINTF("getFileID partial or error read, ret %d\n", r);
+	}
+	DBGPRINTF("getFileID for '%s', file_id_hash '%s'\n", act->name, act->file_id);
+}
+
 /* this generates a state file name suitable for the given file. To avoid
  * malloc calls, it must be passed a buffer which should be MAXFNAME large.
  * Note: the buffer is not necessarily populated ... always ONLY use the
@@ -1060,7 +1135,7 @@
 {
 	DBGPRINTF("getStateFileName for '%s'\n", act->name);
 	snprintf((char*)buf, lenbuf - 1, "imfile-state:%lld", (long long) act->ino);
-	DBGPRINTF("getStateFileName:  stat file name now is %s\n", buf);
+	DBGPRINTF("getStateFileName:  state file name now is %s\n", buf);
 	return buf;
 }
 
@@ -1136,18 +1209,45 @@
 	const instanceConf_t *const inst = act->edge->instarr[0];// TODO: same file, multiple instances?
 
 	uchar *const statefn = getStateFileName(act, statefile, sizeof(statefile));
+	getFileID(act);
 
-	getFullStateFileName(statefn, pszSFNam, sizeof(pszSFNam));
+	getFullStateFileName(statefn, act->file_id, pszSFNam, sizeof(pszSFNam));
 	DBGPRINTF("trying to open state for '%s', state file '%s'\n", act->name, pszSFNam);
 
 	/* check if the file exists */
 	fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600);
 	if(fd < 0) {
 		if(errno == ENOENT) {
-			DBGPRINTF("NO state file (%s) exists for '%s' - trying to see if "
-				"old-style file exists\n", pszSFNam, act->name);
-			CHKiRet(OLD_openFileWithStateFile(act));
-			FINALIZE;
+			if(act->file_id[0] != '\0') {
+				const char *pszSFNamHash = strdup((const char*)pszSFNam);
+				CHKmalloc(pszSFNamHash);
+				DBGPRINTF("state file %s for %s does not exist - trying to see if "
+					"inode-only file exists\n", pszSFNam, act->name);
+				getFullStateFileName(statefn, "", pszSFNam, sizeof(pszSFNam));
+				fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600);
+				if(fd >= 0) {
+					dbgprintf("found inode-only state file, renaming it now that we "
+						"know the file_id, new name: %s\n", pszSFNamHash);
+					/* we now can use identify the file, so let's rename it */
+					if(rename((const char*)pszSFNam, pszSFNamHash) != 0) {
+						LogError(errno, RS_RET_IO_ERROR,
+							"imfile error trying to rename state file for '%s' - "
+							"ignoring this error, usually this means a file no "
+							"longer file is left over, but this may also cause "
+							"some real trouble. Still the best we can do ",
+							act->name);
+						free((void*) pszSFNamHash);
+						ABORT_FINALIZE(RS_RET_IO_ERROR);
+					}
+				}
+				free((void*) pszSFNamHash);
+			}
+			if(fd < 0) {
+				DBGPRINTF("state file %s for %s does not exist - trying to see if "
+					"old-style file exists\n", pszSFNam, act->name);
+				CHKiRet(OLD_openFileWithStateFile(act));
+				FINALIZE;
+			}
 		} else {
 			LogError(errno, RS_RET_IO_ERROR,
 				"imfile error trying to access state file for '%s'",
@@ -1156,6 +1256,7 @@
 		}
 	}
 
+	DBGPRINTF("opened state file %s for %s\n", pszSFNam, act->name);
 	CHKiRet(strm.Construct(&act->pStrm));
 
 	struct json_object *jval;
@@ -1289,6 +1390,7 @@
 {
 	int64 strtOffs;
 	DEFiRet;
+	int64_t startOffs = 0;
 	int nProcessed = 0;
 
 	DBGPRINTF("pollFileReal enter, pStrm %p, name '%s'\n", act->pStrm, act->name);
@@ -1301,6 +1403,7 @@
 		CHKiRet(openFile(act)); /* open file */
 	}
 
+	startOffs = act->pStrm->iCurrOffs;	
 	/* loop below will be exited when strmReadLine() returns EOF */
 	while(glbl.GetGlobalInputTermState() == 0) {
 		if(inst->maxLinesAtOnce != 0 && nProcessed >= inst->maxLinesAtOnce)
@@ -1313,6 +1416,11 @@
 				inst->escapeLF, &strtOffs));
 		}
 		++nProcessed;
+		if(startOffs < FILE_ID_SIZE && act->pStrm->iCurrOffs >= FILE_ID_SIZE) {
+			dbgprintf("initiating state file write as sufficient data is now present; file=%s\n", act->name);
+			persistStrmState(act);
+			startOffs = act->pStrm->iCurrOffs; /* disable check */
+		}
 		runModConf->bHadFileData = 1; /* this is just a flag, so set it and forget it */
 		CHKiRet(enqLine(act, *pCStr, strtOffs)); /* process line */
 		rsCStrDestruct(pCStr); /* discard string (must be done by us!) */
@@ -2122,7 +2230,8 @@
 	uchar statefname[MAXFNAME];
 
 	uchar *const statefn = getStateFileName(act, statefile, sizeof(statefile));
-	getFullStateFileName(statefn, statefname, sizeof(statefname));
+	getFileID(act);
+	getFullStateFileName(statefn, act->file_id, statefname, sizeof(statefname));
 	DBGPRINTF("persisting state for '%s', state file '%s'\n", act->name, statefname);
 
 	struct json_object *jval = NULL;
diff --git a/plugins/imfile/siphash.c b/plugins/imfile/siphash.c
new file mode 100644
index 0000000000..8d5fac7343
--- /dev/null
+++ b/plugins/imfile/siphash.c
@@ -0,0 +1,185 @@
+/* SipHash reference C implementation
+ *
+ * Copyright (c) 2012-2016 Jean-Philippe Aumasson
+ * <jeanphilippe.aumasson@gmail.com>
+ * Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * Slightly adapted by rsyslog in regard to build system and code style
+ * check.
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide. This software is distributed without any warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication along
+ * with
+ * this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ *
+ * For details on siphash see https://131002.net/siphash/
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* default: SipHash-2-4 */
+#define cROUNDS 2
+#define dROUNDS 4
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v)                                                        \
+	(p)[0] = (uint8_t)((v));                                                   \
+	(p)[1] = (uint8_t)((v) >> 8);                                              \
+	(p)[2] = (uint8_t)((v) >> 16);                                             \
+	(p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v)                                                        \
+	U32TO8_LE((p), (uint32_t)((v)));                                           \
+	U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#define U8TO64_LE(p)                                                           \
+	(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
+	((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
+	((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
+	((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+
+#define SIPROUND                                                               \
+	do {                                                                       \
+		v0 += v1;                                                              \
+		v1 = ROTL(v1, 13);                                                     \
+		v1 ^= v0;                                                              \
+		v0 = ROTL(v0, 32);                                                     \
+		v2 += v3;                                                              \
+		v3 = ROTL(v3, 16);                                                     \
+		v3 ^= v2;                                                              \
+		v0 += v3;                                                              \
+		v3 = ROTL(v3, 21);                                                     \
+		v3 ^= v0;                                                              \
+		v2 += v1;                                                              \
+		v1 = ROTL(v1, 17);                                                     \
+		v1 ^= v2;                                                              \
+		v2 = ROTL(v2, 32);                                                     \
+	} while (0)
+
+#ifdef DEBUG
+#define TRACE                                                                  \
+	do {                                                                       \
+		printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32),       \
+		       (uint32_t)v0);                                                  \
+		printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32),       \
+		       (uint32_t)v1);                                                  \
+		printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32),       \
+		       (uint32_t)v2);                                                  \
+		printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32),       \
+		       (uint32_t)v3);                                                  \
+	} while (0)
+#else
+#define TRACE
+#endif
+
+extern int rs_siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+	uint8_t *out, const size_t outlen); /* avoid compiler warning */
+#if defined(__clang__)
+#pragma GCC diagnostic ignored "-Wunknown-attributes"
+#endif
+int
+#if defined(__clang__)
+__attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+rs_siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+	uint8_t *out, const size_t outlen) {
+
+	uint64_t v0 = 0x736f6d6570736575ULL;
+	uint64_t v1 = 0x646f72616e646f6dULL;
+	uint64_t v2 = 0x6c7967656e657261ULL;
+	uint64_t v3 = 0x7465646279746573ULL;
+	uint64_t k0 = U8TO64_LE(k);
+	uint64_t k1 = U8TO64_LE(k + 8);
+	uint64_t m;
+	int i;
+	const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+	const int left = inlen & 7;
+	uint64_t b = ((uint64_t)inlen) << 56;
+	assert((outlen == 8) || (outlen == 16));
+	v3 ^= k1;
+	v2 ^= k0;
+	v1 ^= k1;
+	v0 ^= k0;
+
+	if (outlen == 16)
+	v1 ^= 0xee;
+
+	for (; in != end; in += 8) {
+		m = U8TO64_LE(in);
+		v3 ^= m;
+
+		TRACE;
+		for (i = 0; i < cROUNDS; ++i)
+		    SIPROUND;
+
+		v0 ^= m;
+	}
+
+	switch (left) {
+	case 7:
+		b |= ((uint64_t)in[6]) << 48;
+		/*FALLTHROUGH*/
+	case 6:
+		b |= ((uint64_t)in[5]) << 40;
+		/*FALLTHROUGH*/
+	case 5:
+		b |= ((uint64_t)in[4]) << 32;
+		/*FALLTHROUGH*/
+	case 4:
+		b |= ((uint64_t)in[3]) << 24;
+		/*FALLTHROUGH*/
+	case 3:
+		b |= ((uint64_t)in[2]) << 16;
+		/*FALLTHROUGH*/
+	case 2:
+		b |= ((uint64_t)in[1]) << 8;
+		/*FALLTHROUGH*/
+	case 1:
+		b |= ((uint64_t)in[0]);
+		break;
+	case 0:
+	default:
+		break;
+	}
+
+	v3 ^= b;
+
+	TRACE;
+	for (i = 0; i < cROUNDS; ++i)
+		SIPROUND;
+
+	v0 ^= b;
+
+	if (outlen == 16)
+		v2 ^= 0xee;
+	else
+		v2 ^= 0xff;
+
+	TRACE;
+	for (i = 0; i < dROUNDS; ++i)
+		SIPROUND;
+
+	b = v0 ^ v1 ^ v2 ^ v3;
+	U64TO8_LE(out, b);
+
+	if (outlen == 8)
+		return 0;
+
+	v1 ^= 0xdd;
+
+	TRACE;
+	for (i = 0; i < dROUNDS; ++i)
+		SIPROUND;
+
+	b = v0 ^ v1 ^ v2 ^ v3;
+	U64TO8_LE(out + 8, b);
+
+	return 0;
+}
--- a/plugins/imfile/imfile.c
+++ b/plugins/imfile/imfile.c
@@ -182,6 +182,7 @@ struct act_obj_s {
 	time_t timeoutBase; /* what time to calculate the timeout against? */
 	/* file dynamic data */
 	char file_id[FILE_ID_HASH_SIZE]; /* file id for this entry, once we could obtain it */
+	char file_id_prev[FILE_ID_HASH_SIZE]; /* previous file id for this entry, set if changed */
 	int in_move;	/* workaround for inotify move: if set, state file must not be deleted */
 	ino_t ino;	/* current inode nbr */
 	int fd;		/* fd to file in order to obtain file_id (needs to be preserved across move) */
@@ -727,6 +728,7 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file,
 	act->ino = ino;
 	act->fd = fd;
 	act->file_id[0] = '\0';
+	act->file_id_prev[0] = '\0';
 	act->is_symlink = is_symlink;
        if (source) { /* we are target of symlink */
                CHKmalloc(act->source_name = strdup(source));
@@ -1378,28 +1380,13 @@ openFileWithStateFile(act_obj_t *const act)
 	if(fd < 0) {
 		if(errno == ENOENT) {
 			if(act->file_id[0] != '\0') {
-				const char *pszSFNamHash = strdup((const char*)pszSFNam);
-				CHKmalloc(pszSFNamHash);
 				DBGPRINTF("state file %s for %s does not exist - trying to see if "
 					"inode-only file exists\n", pszSFNam, act->name);
 				getFullStateFileName(statefn, "", pszSFNam, sizeof(pszSFNam));
 				fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600);
 				if(fd >= 0) {
-					dbgprintf("found inode-only state file, renaming it now that we "
-						"know the file_id, new name: %s\n", pszSFNamHash);
-					/* we now can use identify the file, so let's rename it */
-					if(rename((const char*)pszSFNam, pszSFNamHash) != 0) {
-						LogError(errno, RS_RET_IO_ERROR,
-							"imfile error trying to rename state file for '%s' - "
-							"ignoring this error, usually this means a file no "
-							"longer file is left over, but this may also cause "
-							"some real trouble. Still the best we can do ",
-							act->name);
-						free((void*) pszSFNamHash);
-						ABORT_FINALIZE(RS_RET_IO_ERROR);
-					}
+					dbgprintf("found inode-only state file, will be renamed at next persist\n");
 				}
-				free((void*) pszSFNamHash);
 			}
 			if(fd < 0) {
 				DBGPRINTF("state file %s for %s does not exist - trying to see if "
@@ -2609,6 +2596,24 @@ atomicWriteStateFile(const char *fn, const char *content)
 	RETiRet;
 }
 
+static void
+removeOldStatefile(const uchar *statefn, const char *hashToDelete)
+{
+	int ret;
+	uchar statefname[MAXFNAME];
+
+	getFullStateFileName(statefn, hashToDelete, statefname, sizeof(statefname));
+	DBGPRINTF("removing old state file: '%s'\n", statefname);
+	ret = unlink((const char*)statefname);
+	if(ret != 0 && errno != ENOENT) {
+		LogError(errno, RS_RET_IO_ERROR,
+			"imfile error trying to delete old state file: '%s' - ignoring this "
+			"error, usually this means a file no longer file is left over, but "
+			"this may also cause some real trouble. Still the best we can do ",
+			statefname);
+	}
+}
+
 
 /* This function persists information for a specific file being monitored.
  * To do so, it simply persists the stream object. We do NOT abort on error
@@ -2660,6 +2664,10 @@ persistStrmState(act_obj_t *const act)
 	CHKiRet(atomicWriteStateFile((const char*)statefname, jstr));
 	json_object_put(json);
 
+	if (strncmp((const char *)act->file_id_prev, (const char *)act->file_id, FILE_ID_HASH_SIZE)) {
+		removeOldStatefile(statefn, act->file_id_prev);
+	}
+
 finalize_it:
 	if(iRet != RS_RET_OK) {
 		errmsg.LogError(0, iRet, "imfile: could not persist state "