40a46b
From afb8109dd1968e6353dbdda13e6216e12f2dec03 Mon Sep 17 00:00:00 2001
40a46b
From: Lennart Poettering <lennart@poettering.net>
40a46b
Date: Mon, 12 Feb 2018 16:14:58 +0100
40a46b
Subject: [PATCH] sd-journal: properly handle inotify queue overflow
40a46b
40a46b
This adds proper handling of IN_Q_OVERFLOW: when the inotify queue runs
40a46b
over we'll reiterate all directories we are looking at. At the same time
40a46b
we'll mark all files and directories we encounter that way with a
40a46b
generation counter we first increased. All files and directories not
40a46b
marked like this are then unloaded.
40a46b
40a46b
With this logic we do the best when the inotify queue overflows: we
40a46b
synchronize our in-memory state again with what's on disk.  This
40a46b
contains some refactoring of the directory logic, to share more code
40a46b
between uuid directories and "root" directories and generally make
40a46b
things a bit more readable by splitting things up into smaller bits.
40a46b
40a46b
See: #7998 #8032
40a46b
40a46b
(cherry-picked from commit 858749f7312bd0adb5433075a92e1c35a2fb56ac)
40a46b
40a46b
Resolves: #1540538
40a46b
---
40a46b
 src/journal/journal-file.h     |   2 +
40a46b
 src/journal/journal-internal.h |   2 +
40a46b
 src/journal/sd-journal.c       | 237 ++++++++++++++++++++++++++++++++---------
40a46b
 src/shared/path-util.c         |  14 +++
40a46b
 src/shared/path-util.h         |   2 +
40a46b
 5 files changed, 206 insertions(+), 51 deletions(-)
40a46b
40a46b
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
40a46b
index c74ad5fc5..dd8ef52d2 100644
40a46b
--- a/src/journal/journal-file.h
40a46b
+++ b/src/journal/journal-file.h
40a46b
@@ -121,6 +121,8 @@ typedef struct JournalFile {
40a46b
 
40a46b
         void *fsprg_seed;
40a46b
         size_t fsprg_seed_size;
40a46b
+
40a46b
+        unsigned last_seen_generation;
40a46b
 #endif
40a46b
 } JournalFile;
40a46b
 
40a46b
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
40a46b
index eb23ac28a..999e9d8cb 100644
40a46b
--- a/src/journal/journal-internal.h
40a46b
+++ b/src/journal/journal-internal.h
40a46b
@@ -81,6 +81,7 @@ struct Directory {
40a46b
         char *path;
40a46b
         int wd;
40a46b
         bool is_root;
40a46b
+        unsigned last_seen_generation;
40a46b
 };
40a46b
 
40a46b
 struct sd_journal {
40a46b
@@ -102,6 +103,7 @@ struct sd_journal {
40a46b
         int inotify_fd;
40a46b
         unsigned current_invalidate_counter, last_invalidate_counter;
40a46b
         usec_t last_process_usec;
40a46b
+        unsigned generation;
40a46b
 
40a46b
         char *unique_field;
40a46b
         JournalFile *unique_file;
40a46b
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
40a46b
index 14b65cfed..9186f5188 100644
40a46b
--- a/src/journal/sd-journal.c
40a46b
+++ b/src/journal/sd-journal.c
40a46b
@@ -1229,8 +1229,16 @@ static int add_any_file(sd_journal *j, const char *path) {
40a46b
         assert(j);
40a46b
         assert(path);
40a46b
 
40a46b
-        if (ordered_hashmap_get(j->files, path))
40a46b
-                return 0;
40a46b
+        if (path) {
40a46b
+                f = ordered_hashmap_get(j->files, path);
40a46b
+                if (f) {
40a46b
+                        /* Mark this file as seen in this generation. This is used to GC old files in
40a46b
+                         * process_q_overflow() to detect journal files that are still and discern them from those who
40a46b
+                         * are gone. */
40a46b
+                        f->last_seen_generation = j->generation;
40a46b
+                        return 0;
40a46b
+                }
40a46b
+        }
40a46b
 
40a46b
         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
40a46b
                 log_debug("Too many open journal files, not adding %s.", path);
40a46b
@@ -1252,6 +1260,8 @@ static int add_any_file(sd_journal *j, const char *path) {
40a46b
                 goto fail;
40a46b
         }
40a46b
 
40a46b
+        f->last_seen_generation = j->generation;
40a46b
+
40a46b
         log_debug("File %s added.", f->path);
40a46b
 
40a46b
         check_network(j, f->fd);
40a46b
@@ -1346,10 +1356,96 @@ static int dirname_is_machine_id(const char *fn) {
40a46b
         return sd_id128_equal(id, machine);
40a46b
 }
40a46b
 
40a46b
+static bool dirent_is_journal_file(const struct dirent *de) {
40a46b
+        assert(de);
40a46b
+
40a46b
+        if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
40a46b
+                return false;
40a46b
+
40a46b
+        return endswith(de->d_name, ".journal") ||
40a46b
+                endswith(de->d_name, ".journal~");
40a46b
+}
40a46b
+
40a46b
+static bool dirent_is_id128_subdir(const struct dirent *de) {
40a46b
+        assert(de);
40a46b
+
40a46b
+        if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
40a46b
+                return false;
40a46b
+
40a46b
+        return id128_is_valid(de->d_name);
40a46b
+}
40a46b
+
40a46b
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
40a46b
+        DIR *d;
40a46b
+
40a46b
+        assert(j);
40a46b
+        assert(path);
40a46b
+        assert(ret);
40a46b
+
40a46b
+        d = opendir(path);
40a46b
+        if (!d)
40a46b
+                return -errno;
40a46b
+
40a46b
+        *ret = d;
40a46b
+        return 0;
40a46b
+}
40a46b
+
40a46b
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
40a46b
+
40a46b
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
40a46b
+        struct dirent *de;
40a46b
+
40a46b
+        assert(j);
40a46b
+        assert(m);
40a46b
+        assert(d);
40a46b
+
40a46b
+        FOREACH_DIRENT_ALL(de, d, goto fail) {
40a46b
+                if (dirent_is_journal_file(de))
40a46b
+                        (void) add_file(j, m->path, de->d_name);
40a46b
+
40a46b
+                if (m->is_root && dirent_is_id128_subdir(de))
40a46b
+                        (void) add_directory(j, m->path, de->d_name);
40a46b
+        }
40a46b
+
40a46b
+        return;
40a46b
+
40a46b
+fail:
40a46b
+        log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
40a46b
+}
40a46b
+
40a46b
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
40a46b
+        int r;
40a46b
+
40a46b
+        assert(j);
40a46b
+        assert(m);
40a46b
+        assert(fd >= 0);
40a46b
+
40a46b
+        /* Watch this directory if that's enabled and if it not being watched yet. */
40a46b
+
40a46b
+        if (m->wd > 0) /* Already have a watch? */
40a46b
+                return;
40a46b
+        if (j->inotify_fd < 0) /* Not watching at all? */
40a46b
+                return;
40a46b
+
40a46b
+        m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
40a46b
+        if (m->wd < 0) {
40a46b
+                log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
40a46b
+                return;
40a46b
+        }
40a46b
+
40a46b
+        r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
40a46b
+        if (r == -EEXIST)
40a46b
+                log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
40a46b
+        if (r < 0) {
40a46b
+                log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
40a46b
+                (void) inotify_rm_watch(j->inotify_fd, m->wd);
40a46b
+                m->wd = -1;
40a46b
+        }
40a46b
+}
40a46b
+
40a46b
 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
40a46b
         _cleanup_free_ char *path = NULL;
40a46b
         _cleanup_closedir_ DIR *d = NULL;
40a46b
-        struct dirent *de = NULL;
40a46b
         Directory *m;
40a46b
         int r, k;
40a46b
 
40a46b
@@ -1357,7 +1453,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
40a46b
         assert(prefix);
40a46b
         assert(dirname);
40a46b
 
40a46b
-        log_debug("Considering %s/%s.", prefix, dirname);
40a46b
+        log_debug("Considering '%s/%s'.", prefix, dirname);
40a46b
 
40a46b
         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
40a46b
             !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run")))
40a46b
@@ -1369,9 +1465,9 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
40a46b
                 goto fail;
40a46b
         }
40a46b
 
40a46b
-        d = opendir(path);
40a46b
-        if (!d) {
40a46b
-                r = log_debug_errno(errno, "Failed to open directory %s: %m", path);
40a46b
+        r = directory_open(j, path, &d);
40a46b
+        if (r < 0) {
40a46b
+                r = log_debug_errno(errno, "Failed to open directory '%s': %m", path);
40a46b
                 goto fail;
40a46b
         }
40a46b
 
40a46b
@@ -1398,25 +1494,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
40a46b
                 log_debug("Directory %s added.", m->path);
40a46b
 
40a46b
         } else if (m->is_root)
40a46b
-                return 0;
40a46b
-
40a46b
-        if (m->wd <= 0 && j->inotify_fd >= 0) {
40a46b
-
40a46b
-                m->wd = inotify_add_watch(j->inotify_fd, m->path,
40a46b
-                                          IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
40a46b
-                                          IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
40a46b
-                                          IN_ONLYDIR);
40a46b
+                return 0; /* Don't 'downgrade' from root directory */
40a46b
 
40a46b
-                if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
40a46b
-                        inotify_rm_watch(j->inotify_fd, m->wd);
40a46b
-        }
40a46b
+        m->last_seen_generation = j->generation;
40a46b
 
40a46b
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
40a46b
+        directory_watch(j, m, dirfd(d),
40a46b
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
40a46b
+                        IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
40a46b
+                        IN_ONLYDIR);
40a46b
 
40a46b
-                if (dirent_is_file_with_suffix(de, ".journal") ||
40a46b
-                    dirent_is_file_with_suffix(de, ".journal~"))
40a46b
-                        (void) add_file(j, m->path, de->d_name);
40a46b
-        }
40a46b
+        if (!j->no_new_files)
40a46b
+                directory_enumerate(j, m, d);
40a46b
 
40a46b
         check_network(j, dirfd(d));
40a46b
 
40a46b
@@ -1432,13 +1520,14 @@ fail:
40a46b
 
40a46b
 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
40a46b
         _cleanup_closedir_ DIR *d = NULL;
40a46b
-        struct dirent *de;
40a46b
         Directory *m;
40a46b
         int r, k;
40a46b
 
40a46b
         assert(j);
40a46b
         assert(p);
40a46b
 
40a46b
+        log_debug("Considering root directory '%s'.", p);
40a46b
+
40a46b
         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
40a46b
             !path_startswith(p, "/run"))
40a46b
                 return -EINVAL;
40a46b
@@ -1446,12 +1535,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
40a46b
         if (j->prefix)
40a46b
                 p = strjoina(j->prefix, p);
40a46b
 
40a46b
-        d = opendir(p);
40a46b
-        if (!d) {
40a46b
-                if (errno == ENOENT && missing_ok)
40a46b
-                        return 0;
40a46b
-
40a46b
-                r = log_debug_errno(errno, "Failed to open root directory %s: %m", p);
40a46b
+        r = directory_open(j, p, &d);
40a46b
+        if (r == -ENOENT && missing_ok)
40a46b
+                return 0;
40a46b
+        if (r < 0) {
40a46b
+                log_debug_errno(r, "Failed to open root directory %s: %m", p);
40a46b
                 goto fail;
40a46b
         }
40a46b
 
40a46b
@@ -1495,19 +1583,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
40a46b
                         inotify_rm_watch(j->inotify_fd, m->wd);
40a46b
         }
40a46b
 
40a46b
-        if (j->no_new_files)
40a46b
-                return 0;
40a46b
-
40a46b
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
40a46b
-                sd_id128_t id;
40a46b
+        directory_watch(j, m, dirfd(d),
40a46b
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
40a46b
+                        IN_ONLYDIR);
40a46b
 
40a46b
-                if (dirent_is_file_with_suffix(de, ".journal") ||
40a46b
-                    dirent_is_file_with_suffix(de, ".journal~"))
40a46b
-                        (void) add_file(j, m->path, de->d_name);
40a46b
-                else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) &&
40a46b
-                         sd_id128_from_string(de->d_name, &id) >= 0)
40a46b
-                        (void) add_directory(j, m->path, de->d_name);
40a46b
-        }
40a46b
+        if (!j->no_new_files)
40a46b
+                directory_enumerate(j, m, d);
40a46b
 
40a46b
         check_network(j, dirfd(d));
40a46b
 
40a46b
@@ -2068,6 +2149,18 @@ _public_ void sd_journal_restart_data(sd_journal *j) {
40a46b
         j->current_field = 0;
40a46b
 }
40a46b
 
40a46b
+static int reiterate_all_paths(sd_journal *j) {
40a46b
+        assert(j);
40a46b
+
40a46b
+        if (j->no_new_files)
40a46b
+                return add_current_paths(j);
40a46b
+
40a46b
+        if (j->path)
40a46b
+                return add_root_directory(j, j->path, true);
40a46b
+
40a46b
+        return add_search_paths(j);
40a46b
+}
40a46b
+
40a46b
 _public_ int sd_journal_get_fd(sd_journal *j) {
40a46b
         int r;
40a46b
 
40a46b
@@ -2081,15 +2174,11 @@ _public_ int sd_journal_get_fd(sd_journal *j) {
40a46b
         if (r < 0)
40a46b
                 return r;
40a46b
 
40a46b
-        /* Iterate through all dirs again, to add them to the
40a46b
-         * inotify */
40a46b
-        if (j->no_new_files)
40a46b
-                r = add_current_paths(j);
40a46b
-        else if (j->path)
40a46b
-                r = add_root_directory(j, j->path, true);
40a46b
-        else
40a46b
-                r = add_search_paths(j);
40a46b
-        if (r < 0)
40a46b
+         log_debug("Reiterating files to get inotify watches established.");
40a46b
+
40a46b
+        /* Iterate through all dirs again, to add them to the inotify */
40a46b
+        r = reiterate_all_paths(j);
40a46b
+         if (r < 0)
40a46b
                 return r;
40a46b
 
40a46b
         return j->inotify_fd;
40a46b
@@ -2131,12 +2220,58 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
40a46b
         return 1;
40a46b
 }
40a46b
 
40a46b
+static void process_q_overflow(sd_journal *j) {
40a46b
+        JournalFile *f;
40a46b
+        Directory *m;
40a46b
+        Iterator i;
40a46b
+
40a46b
+        assert(j);
40a46b
+
40a46b
+        /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
40a46b
+         * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
40a46b
+         * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
40a46b
+         * are subject for unloading. */
40a46b
+
40a46b
+        log_debug("Inotify queue overrun, reiterating everything.");
40a46b
+
40a46b
+        j->generation++;
40a46b
+        (void) reiterate_all_paths(j);
40a46b
+
40a46b
+        ORDERED_HASHMAP_FOREACH(f, j->files, i) {
40a46b
+
40a46b
+                if (f->last_seen_generation == j->generation)
40a46b
+                        continue;
40a46b
+
40a46b
+                log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
40a46b
+                remove_file_real(j, f);
40a46b
+        }
40a46b
+
40a46b
+        HASHMAP_FOREACH(m, j->directories_by_path, i) {
40a46b
+
40a46b
+                if (m->last_seen_generation == j->generation)
40a46b
+                        continue;
40a46b
+
40a46b
+                if (m->is_root) /* Never GC root directories */
40a46b
+                        continue;
40a46b
+
40a46b
+                log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
40a46b
+                remove_directory(j, m);
40a46b
+        }
40a46b
+
40a46b
+        log_debug("Reiteration complete.");
40a46b
+}
40a46b
+
40a46b
 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
40a46b
         Directory *d;
40a46b
 
40a46b
         assert(j);
40a46b
         assert(e);
40a46b
 
40a46b
+        if (e->mask & IN_Q_OVERFLOW) {
40a46b
+                process_q_overflow(j);
40a46b
+                return;
40a46b
+        }
40a46b
+
40a46b
         /* Is this a subdirectory we watch? */
40a46b
         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
40a46b
         if (d) {
40a46b
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
40a46b
index 1181ffb9d..4d6e5e772 100644
40a46b
--- a/src/shared/path-util.c
40a46b
+++ b/src/shared/path-util.c
40a46b
@@ -738,3 +738,17 @@ char *prefix_root(const char *root, const char *path) {
40a46b
         strcpy(p, path);
40a46b
         return n;
40a46b
 }
40a46b
+
40a46b
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
40a46b
+        char path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
40a46b
+        int r;
40a46b
+
40a46b
+        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
40a46b
+        xsprintf(path, "/proc/self/fd/%i", what);
40a46b
+
40a46b
+        r = inotify_add_watch(fd, path, mask);
40a46b
+        if (r < 0)
40a46b
+                return -errno;
40a46b
+
40a46b
+        return r;
40a46b
+}
40a46b
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
40a46b
index 71bb740e9..e14702da8 100644
40a46b
--- a/src/shared/path-util.h
40a46b
+++ b/src/shared/path-util.h
40a46b
@@ -65,6 +65,8 @@ int fsck_exists(const char *fstype);
40a46b
 
40a46b
 char *prefix_root(const char *root, const char *path);
40a46b
 
40a46b
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
40a46b
+
40a46b
 /* Similar to prefix_root(), but returns an alloca() buffer, or
40a46b
  * possibly a const pointer into the path parameter */
40a46b
 #define prefix_roota(root, path)                                        \