Blob Blame History Raw
From cc5710c3ad0ff51fa84b736d66d5f70aa0ade2b3 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 25 Apr 2016 18:08:42 +0200
Subject: [PATCH] journalctl: don't trust the per-field entry tables when
 looking for boot IDs

When appending to a journal file, journald will:

a) first, append the actual entry to the end of the journal file
b) second, add an offset reference to it to the global entry array stored at
   the beginning of the file
c) third, add offset references to it to the per-field entry array stored at
   various places of the file

The global entry array, maintained by b) is used when iterating through the
journal without matches applied.

The per-field entry array maintained by c) is used when iterating through the
journal with a match for that specific field applied.

In the wild, there are journal files where a) and b) were completed, but c)
was not before the files were abandoned. This means, that in some cases log
entries are at the end of these files that appear in the global entry array,
but not in the per-field entry array of the _BOOT_ID= field. Now, the
"journalctl --list-boots" command alternatingly uses the global entry array
and the per-field entry array of the _BOOT_ID= field. It seeks to the last
entry of a specific _BOOT_ID=field by having the right match installed, and
then jumps to the next following entry with no match installed anymore, under
the assumption this would bring it to the next boot ID. However, if the
per-field entry wasn't written fully, it might actually turn out that the
global entry array might know one more entry with the same _BOOT_ID, thus
resulting in a indefinite loop around the same _BOOT_ID.

This patch fixes that, by updating the boot search logic to always continue
reading entries until the boot ID actually changed from the previous. Thus, the
per-field entry array is used as quick jump index (i.e. as an optimization),
but not trusted otherwise.  Only the global entry array is trusted.

This replaces PR #1904, which is actually very similar to this one. However,
this one actually reads the boot ID directly from the entry header, and doesn't
try to read it at all until the read pointer is actually really located on the
first item to read.

Fixes: #617

Replaces: #1904

Cherry-picked from: dc00966228ff90c554fd034e588ea55eb605ec52
Related: #1318994
---
 src/journal/journalctl.c | 71 ++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index 5864ff50a..723854a2e 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -941,18 +941,18 @@ static void boot_id_free_all(BootId *l) {
         }
 }
 
-static int discover_next_boot(
-                sd_journal *j,
-                BootId **boot,
+static int discover_next_boot(sd_journal *j,
+                sd_id128_t previous_boot_id,
                 bool advance_older,
-                bool read_realtime) {
+                BootId **ret) {
 
-        int r;
-        char match[9+32+1] = "_BOOT_ID=";
         _cleanup_free_ BootId *next_boot = NULL;
+        char match[9+32+1] = "_BOOT_ID=";
+        sd_id128_t boot_id;
+        int r;
 
         assert(j);
-        assert(boot);
+        assert(ret);
 
         /* We expect the journal to be on the last position of a boot
          * (in relation to the direction we are going), so that the next
@@ -965,29 +965,40 @@ static int discover_next_boot(
          * we can actually advance to a *different* boot. */
         sd_journal_flush_matches(j);
 
-        if (advance_older)
-                r = sd_journal_previous(j);
-        else
-                r = sd_journal_next(j);
-        if (r < 0)
-                return r;
-        else if (r == 0)
-                return 0; /* End of journal, yay. */
+        do {
+                if (advance_older)
+                        r = sd_journal_previous(j);
+                else
+                        r = sd_journal_next(j);
+                if (r < 0)
+                        return r;
+                else if (r == 0)
+                        return 0; /* End of journal, yay. */
+
+                r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
+                if (r < 0)
+                        return r;
+
+                /* We iterate through this in a loop, until the boot ID differs from the previous one. Note that
+                 * normally, this will only require a single iteration, as we seeked to the last entry of the previous
+                 * boot entry already. However, it might happen that the per-journal-field entry arrays are less
+                 * complete than the main entry array, and hence might reference an entry that's not actually the last
+                 * one of the boot ID as last one. Let's hence use the per-field array is initial seek position to
+                 * speed things up, but let's not trust that it is complete, and hence, manually advance as
+                 * necessary. */
+
+        } while (sd_id128_equal(boot_id, previous_boot_id));
 
         next_boot = new0(BootId, 1);
         if (!next_boot)
                 return log_oom();
 
-        r = sd_journal_get_monotonic_usec(j, NULL, &next_boot->id);
+        next_boot->id = boot_id;
+
+        r = sd_journal_get_realtime_usec(j, &next_boot->first);
         if (r < 0)
                 return r;
 
-        if (read_realtime) {
-                r = sd_journal_get_realtime_usec(j, &next_boot->first);
-                if (r < 0)
-                        return r;
-        }
-
         /* Now seek to the last occurrence of this boot ID. */
         sd_id128_to_string(next_boot->id, match + 9);
         r = sd_journal_add_match(j, match, sizeof(match) - 1);
@@ -1010,13 +1021,11 @@ static int discover_next_boot(
         else if (r == 0)
                 return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */
 
-        if (read_realtime) {
-                r = sd_journal_get_realtime_usec(j, &next_boot->last);
-                if (r < 0)
-                        return r;
-        }
+        r = sd_journal_get_realtime_usec(j, &next_boot->last);
+        if (r < 0)
+                return r;
 
-        *boot = next_boot;
+        *ret = next_boot;
         next_boot = NULL;
 
         return 0;
@@ -1032,6 +1041,7 @@ static int get_boots(
         int r, count = 0;
         BootId *head = NULL, *tail = NULL;
         const bool advance_older = query_ref_boot && ref_boot_offset <= 0;
+        sd_id128_t previous_boot_id;
 
         assert(j);
 
@@ -1085,10 +1095,11 @@ static int get_boots(
                 /* No sd_journal_next/previous here. */
         }
 
+        previous_boot_id = SD_ID128_NULL;
         for (;;) {
                 _cleanup_free_ BootId *current = NULL;
 
-                r = discover_next_boot(j, &current, advance_older, !query_ref_boot);
+                r = discover_next_boot(j, previous_boot_id, advance_older, &current);
                 if (r < 0) {
                         boot_id_free_all(head);
                         return r;
@@ -1097,6 +1108,8 @@ static int get_boots(
                 if (!current)
                         break;
 
+                previous_boot_id = current->id;
+
                 if (query_ref_boot) {
                         if (!skip_once)
                                 ref_boot_offset += advance_older ? 1 : -1;