Blob Blame History Raw
From 7204e7f9ea3067bda7e5658a06e91b67c736f8ab Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 12 Feb 2018 16:14:58 +0100
Subject: [PATCH] sd-journal: properly handle inotify queue overflow

This adds proper handling of IN_Q_OVERFLOW: when the inotify queue runs
over we'll reiterate all directories we are looking at. At the same time
we'll mark all files and directories we encounter that way with a
generation counter we first increased. All files and directories not
marked like this are then unloaded.

With this logic we do the best when the inotify queue overflows: we
synchronize our in-memory state again with what's on disk.  This
contains some refactoring of the directory logic, to share more code
between uuid directories and "root" directories and generally make
things a bit more readable by splitting things up into smaller bits.

See: #7998 #8032

(cherry-picked from commit 858749f7312bd0adb5433075a92e1c35a2fb56ac)

Resolves: #1540538
---
 src/journal/journal-file.h     |   2 +
 src/journal/journal-internal.h |   2 +
 src/journal/sd-journal.c       | 237 ++++++++++++++++++++++++++-------
 src/shared/path-util.c         |  14 ++
 src/shared/path-util.h         |   2 +
 5 files changed, 206 insertions(+), 51 deletions(-)

diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index c74ad5fc5..dd8ef52d2 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -121,6 +121,8 @@ typedef struct JournalFile {
 
         void *fsprg_seed;
         size_t fsprg_seed_size;
+
+        unsigned last_seen_generation;
 #endif
 } JournalFile;
 
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
index eb23ac28a..999e9d8cb 100644
--- a/src/journal/journal-internal.h
+++ b/src/journal/journal-internal.h
@@ -81,6 +81,7 @@ struct Directory {
         char *path;
         int wd;
         bool is_root;
+        unsigned last_seen_generation;
 };
 
 struct sd_journal {
@@ -102,6 +103,7 @@ struct sd_journal {
         int inotify_fd;
         unsigned current_invalidate_counter, last_invalidate_counter;
         usec_t last_process_usec;
+        unsigned generation;
 
         char *unique_field;
         JournalFile *unique_file;
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 14b65cfed..9186f5188 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -1229,8 +1229,16 @@ static int add_any_file(sd_journal *j, const char *path) {
         assert(j);
         assert(path);
 
-        if (ordered_hashmap_get(j->files, path))
-                return 0;
+        if (path) {
+                f = ordered_hashmap_get(j->files, path);
+                if (f) {
+                        /* Mark this file as seen in this generation. This is used to GC old files in
+                         * process_q_overflow() to detect journal files that are still and discern them from those who
+                         * are gone. */
+                        f->last_seen_generation = j->generation;
+                        return 0;
+                }
+        }
 
         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
                 log_debug("Too many open journal files, not adding %s.", path);
@@ -1252,6 +1260,8 @@ static int add_any_file(sd_journal *j, const char *path) {
                 goto fail;
         }
 
+        f->last_seen_generation = j->generation;
+
         log_debug("File %s added.", f->path);
 
         check_network(j, f->fd);
@@ -1346,10 +1356,96 @@ static int dirname_is_machine_id(const char *fn) {
         return sd_id128_equal(id, machine);
 }
 
+static bool dirent_is_journal_file(const struct dirent *de) {
+        assert(de);
+
+        if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+                return false;
+
+        return endswith(de->d_name, ".journal") ||
+                endswith(de->d_name, ".journal~");
+}
+
+static bool dirent_is_id128_subdir(const struct dirent *de) {
+        assert(de);
+
+        if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
+                return false;
+
+        return id128_is_valid(de->d_name);
+}
+
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
+        DIR *d;
+
+        assert(j);
+        assert(path);
+        assert(ret);
+
+        d = opendir(path);
+        if (!d)
+                return -errno;
+
+        *ret = d;
+        return 0;
+}
+
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
+
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
+        struct dirent *de;
+
+        assert(j);
+        assert(m);
+        assert(d);
+
+        FOREACH_DIRENT_ALL(de, d, goto fail) {
+                if (dirent_is_journal_file(de))
+                        (void) add_file(j, m->path, de->d_name);
+
+                if (m->is_root && dirent_is_id128_subdir(de))
+                        (void) add_directory(j, m->path, de->d_name);
+        }
+
+        return;
+
+fail:
+        log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
+}
+
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
+        int r;
+
+        assert(j);
+        assert(m);
+        assert(fd >= 0);
+
+        /* Watch this directory if that's enabled and if it not being watched yet. */
+
+        if (m->wd > 0) /* Already have a watch? */
+                return;
+        if (j->inotify_fd < 0) /* Not watching at all? */
+                return;
+
+        m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
+        if (m->wd < 0) {
+                log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
+                return;
+        }
+
+        r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
+        if (r == -EEXIST)
+                log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
+        if (r < 0) {
+                log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
+                (void) inotify_rm_watch(j->inotify_fd, m->wd);
+                m->wd = -1;
+        }
+}
+
 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
         _cleanup_free_ char *path = NULL;
         _cleanup_closedir_ DIR *d = NULL;
-        struct dirent *de = NULL;
         Directory *m;
         int r, k;
 
@@ -1357,7 +1453,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
         assert(prefix);
         assert(dirname);
 
-        log_debug("Considering %s/%s.", prefix, dirname);
+        log_debug("Considering '%s/%s'.", prefix, dirname);
 
         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
             !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run")))
@@ -1369,9 +1465,9 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
                 goto fail;
         }
 
-        d = opendir(path);
-        if (!d) {
-                r = log_debug_errno(errno, "Failed to open directory %s: %m", path);
+        r = directory_open(j, path, &d);
+        if (r < 0) {
+                r = log_debug_errno(errno, "Failed to open directory '%s': %m", path);
                 goto fail;
         }
 
@@ -1398,25 +1494,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
                 log_debug("Directory %s added.", m->path);
 
         } else if (m->is_root)
-                return 0;
-
-        if (m->wd <= 0 && j->inotify_fd >= 0) {
-
-                m->wd = inotify_add_watch(j->inotify_fd, m->path,
-                                          IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
-                                          IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
-                                          IN_ONLYDIR);
+                return 0; /* Don't 'downgrade' from root directory */
 
-                if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
-                        inotify_rm_watch(j->inotify_fd, m->wd);
-        }
+        m->last_seen_generation = j->generation;
 
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
+        directory_watch(j, m, dirfd(d),
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+                        IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
+                        IN_ONLYDIR);
 
-                if (dirent_is_file_with_suffix(de, ".journal") ||
-                    dirent_is_file_with_suffix(de, ".journal~"))
-                        (void) add_file(j, m->path, de->d_name);
-        }
+        if (!j->no_new_files)
+                directory_enumerate(j, m, d);
 
         check_network(j, dirfd(d));
 
@@ -1432,13 +1520,14 @@ fail:
 
 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
         _cleanup_closedir_ DIR *d = NULL;
-        struct dirent *de;
         Directory *m;
         int r, k;
 
         assert(j);
         assert(p);
 
+        log_debug("Considering root directory '%s'.", p);
+
         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
             !path_startswith(p, "/run"))
                 return -EINVAL;
@@ -1446,12 +1535,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
         if (j->prefix)
                 p = strjoina(j->prefix, p);
 
-        d = opendir(p);
-        if (!d) {
-                if (errno == ENOENT && missing_ok)
-                        return 0;
-
-                r = log_debug_errno(errno, "Failed to open root directory %s: %m", p);
+        r = directory_open(j, p, &d);
+        if (r == -ENOENT && missing_ok)
+                return 0;
+        if (r < 0) {
+                log_debug_errno(r, "Failed to open root directory %s: %m", p);
                 goto fail;
         }
 
@@ -1495,19 +1583,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
                         inotify_rm_watch(j->inotify_fd, m->wd);
         }
 
-        if (j->no_new_files)
-                return 0;
-
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
-                sd_id128_t id;
+        directory_watch(j, m, dirfd(d),
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+                        IN_ONLYDIR);
 
-                if (dirent_is_file_with_suffix(de, ".journal") ||
-                    dirent_is_file_with_suffix(de, ".journal~"))
-                        (void) add_file(j, m->path, de->d_name);
-                else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) &&
-                         sd_id128_from_string(de->d_name, &id) >= 0)
-                        (void) add_directory(j, m->path, de->d_name);
-        }
+        if (!j->no_new_files)
+                directory_enumerate(j, m, d);
 
         check_network(j, dirfd(d));
 
@@ -2068,6 +2149,18 @@ _public_ void sd_journal_restart_data(sd_journal *j) {
         j->current_field = 0;
 }
 
+static int reiterate_all_paths(sd_journal *j) {
+        assert(j);
+
+        if (j->no_new_files)
+                return add_current_paths(j);
+
+        if (j->path)
+                return add_root_directory(j, j->path, true);
+
+        return add_search_paths(j);
+}
+
 _public_ int sd_journal_get_fd(sd_journal *j) {
         int r;
 
@@ -2081,15 +2174,11 @@ _public_ int sd_journal_get_fd(sd_journal *j) {
         if (r < 0)
                 return r;
 
-        /* Iterate through all dirs again, to add them to the
-         * inotify */
-        if (j->no_new_files)
-                r = add_current_paths(j);
-        else if (j->path)
-                r = add_root_directory(j, j->path, true);
-        else
-                r = add_search_paths(j);
-        if (r < 0)
+         log_debug("Reiterating files to get inotify watches established.");
+
+        /* Iterate through all dirs again, to add them to the inotify */
+        r = reiterate_all_paths(j);
+         if (r < 0)
                 return r;
 
         return j->inotify_fd;
@@ -2131,12 +2220,58 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
         return 1;
 }
 
+static void process_q_overflow(sd_journal *j) {
+        JournalFile *f;
+        Directory *m;
+        Iterator i;
+
+        assert(j);
+
+        /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
+         * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
+         * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
+         * are subject for unloading. */
+
+        log_debug("Inotify queue overrun, reiterating everything.");
+
+        j->generation++;
+        (void) reiterate_all_paths(j);
+
+        ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+
+                if (f->last_seen_generation == j->generation)
+                        continue;
+
+                log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
+                remove_file_real(j, f);
+        }
+
+        HASHMAP_FOREACH(m, j->directories_by_path, i) {
+
+                if (m->last_seen_generation == j->generation)
+                        continue;
+
+                if (m->is_root) /* Never GC root directories */
+                        continue;
+
+                log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
+                remove_directory(j, m);
+        }
+
+        log_debug("Reiteration complete.");
+}
+
 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
         Directory *d;
 
         assert(j);
         assert(e);
 
+        if (e->mask & IN_Q_OVERFLOW) {
+                process_q_overflow(j);
+                return;
+        }
+
         /* Is this a subdirectory we watch? */
         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
         if (d) {
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
index 5d4de9ec4..fcc591686 100644
--- a/src/shared/path-util.c
+++ b/src/shared/path-util.c
@@ -861,3 +861,17 @@ char *prefix_root(const char *root, const char *path) {
         strcpy(p, path);
         return n;
 }
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
+        char path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+        int r;
+
+        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
+        xsprintf(path, "/proc/self/fd/%i", what);
+
+        r = inotify_add_watch(fd, path, mask);
+        if (r < 0)
+                return -errno;
+
+        return r;
+}
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
index 34c016229..96490e12b 100644
--- a/src/shared/path-util.h
+++ b/src/shared/path-util.h
@@ -66,6 +66,8 @@ int fsck_exists(const char *fstype);
 
 char *prefix_root(const char *root, const char *path);
 
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+
 /* Similar to prefix_root(), but returns an alloca() buffer, or
  * possibly a const pointer into the path parameter */
 #define prefix_roota(root, path)                                        \