1abbee
From cc5710c3ad0ff51fa84b736d66d5f70aa0ade2b3 Mon Sep 17 00:00:00 2001
1abbee
From: Lennart Poettering <lennart@poettering.net>
1abbee
Date: Mon, 25 Apr 2016 18:08:42 +0200
1abbee
Subject: [PATCH] journalctl: don't trust the per-field entry tables when
1abbee
 looking for boot IDs
1abbee
1abbee
When appending to a journal file, journald will:
1abbee
1abbee
a) first, append the actual entry to the end of the journal file
1abbee
b) second, add an offset reference to it to the global entry array stored at
1abbee
   the beginning of the file
1abbee
c) third, add offset references to it to the per-field entry array stored at
1abbee
   various places of the file
1abbee
1abbee
The global entry array, maintained by b) is used when iterating through the
1abbee
journal without matches applied.
1abbee
1abbee
The per-field entry array maintained by c) is used when iterating through the
1abbee
journal with a match for that specific field applied.
1abbee
1abbee
In the wild, there are journal files where a) and b) were completed, but c)
1abbee
was not before the files were abandoned. This means, that in some cases log
1abbee
entries are at the end of these files that appear in the global entry array,
1abbee
but not in the per-field entry array of the _BOOT_ID= field. Now, the
1abbee
"journalctl --list-boots" command alternatingly uses the global entry array
1abbee
and the per-field entry array of the _BOOT_ID= field. It seeks to the last
1abbee
entry of a specific _BOOT_ID=field by having the right match installed, and
1abbee
then jumps to the next following entry with no match installed anymore, under
1abbee
the assumption this would bring it to the next boot ID. However, if the
1abbee
per-field entry wasn't written fully, it might actually turn out that the
1abbee
global entry array might know one more entry with the same _BOOT_ID, thus
1abbee
resulting in a indefinite loop around the same _BOOT_ID.
1abbee
1abbee
This patch fixes that, by updating the boot search logic to always continue
1abbee
reading entries until the boot ID actually changed from the previous. Thus, the
1abbee
per-field entry array is used as quick jump index (i.e. as an optimization),
1abbee
but not trusted otherwise.  Only the global entry array is trusted.
1abbee
1abbee
This replaces PR #1904, which is actually very similar to this one. However,
1abbee
this one actually reads the boot ID directly from the entry header, and doesn't
1abbee
try to read it at all until the read pointer is actually really located on the
1abbee
first item to read.
1abbee
1abbee
Fixes: #617
1abbee
1abbee
Replaces: #1904
1abbee
1abbee
Cherry-picked from: dc00966228ff90c554fd034e588ea55eb605ec52
1abbee
Related: #1318994
1abbee
---
23b3cf
 src/journal/journalctl.c | 71 ++++++++++++++++++++++++----------------
1abbee
 1 file changed, 42 insertions(+), 29 deletions(-)
1abbee
1abbee
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
c62b8e
index 5864ff50a6..723854a2e9 100644
1abbee
--- a/src/journal/journalctl.c
1abbee
+++ b/src/journal/journalctl.c
1abbee
@@ -941,18 +941,18 @@ static void boot_id_free_all(BootId *l) {
1abbee
         }
1abbee
 }
1abbee
 
1abbee
-static int discover_next_boot(
1abbee
-                sd_journal *j,
1abbee
-                BootId **boot,
1abbee
+static int discover_next_boot(sd_journal *j,
1abbee
+                sd_id128_t previous_boot_id,
1abbee
                 bool advance_older,
1abbee
-                bool read_realtime) {
1abbee
+                BootId **ret) {
1abbee
 
1abbee
-        int r;
1abbee
-        char match[9+32+1] = "_BOOT_ID=";
1abbee
         _cleanup_free_ BootId *next_boot = NULL;
1abbee
+        char match[9+32+1] = "_BOOT_ID=";
1abbee
+        sd_id128_t boot_id;
1abbee
+        int r;
1abbee
 
1abbee
         assert(j);
1abbee
-        assert(boot);
1abbee
+        assert(ret);
1abbee
 
1abbee
         /* We expect the journal to be on the last position of a boot
1abbee
          * (in relation to the direction we are going), so that the next
1abbee
@@ -965,29 +965,40 @@ static int discover_next_boot(
1abbee
          * we can actually advance to a *different* boot. */
1abbee
         sd_journal_flush_matches(j);
1abbee
 
1abbee
-        if (advance_older)
1abbee
-                r = sd_journal_previous(j);
1abbee
-        else
1abbee
-                r = sd_journal_next(j);
1abbee
-        if (r < 0)
1abbee
-                return r;
1abbee
-        else if (r == 0)
1abbee
-                return 0; /* End of journal, yay. */
1abbee
+        do {
1abbee
+                if (advance_older)
1abbee
+                        r = sd_journal_previous(j);
1abbee
+                else
1abbee
+                        r = sd_journal_next(j);
1abbee
+                if (r < 0)
1abbee
+                        return r;
1abbee
+                else if (r == 0)
1abbee
+                        return 0; /* End of journal, yay. */
1abbee
+
1abbee
+                r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
1abbee
+                if (r < 0)
1abbee
+                        return r;
1abbee
+
1abbee
+                /* We iterate through this in a loop, until the boot ID differs from the previous one. Note that
1abbee
+                 * normally, this will only require a single iteration, as we seeked to the last entry of the previous
1abbee
+                 * boot entry already. However, it might happen that the per-journal-field entry arrays are less
1abbee
+                 * complete than the main entry array, and hence might reference an entry that's not actually the last
1abbee
+                 * one of the boot ID as last one. Let's hence use the per-field array is initial seek position to
1abbee
+                 * speed things up, but let's not trust that it is complete, and hence, manually advance as
1abbee
+                 * necessary. */
1abbee
+
1abbee
+        } while (sd_id128_equal(boot_id, previous_boot_id));
1abbee
 
1abbee
         next_boot = new0(BootId, 1);
1abbee
         if (!next_boot)
1abbee
                 return log_oom();
1abbee
 
1abbee
-        r = sd_journal_get_monotonic_usec(j, NULL, &next_boot->id);
1abbee
+        next_boot->id = boot_id;
1abbee
+
1abbee
+        r = sd_journal_get_realtime_usec(j, &next_boot->first);
1abbee
         if (r < 0)
1abbee
                 return r;
1abbee
 
1abbee
-        if (read_realtime) {
1abbee
-                r = sd_journal_get_realtime_usec(j, &next_boot->first);
1abbee
-                if (r < 0)
1abbee
-                        return r;
1abbee
-        }
1abbee
-
1abbee
         /* Now seek to the last occurrence of this boot ID. */
1abbee
         sd_id128_to_string(next_boot->id, match + 9);
1abbee
         r = sd_journal_add_match(j, match, sizeof(match) - 1);
1abbee
@@ -1010,13 +1021,11 @@ static int discover_next_boot(
1abbee
         else if (r == 0)
1abbee
                 return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */
1abbee
 
1abbee
-        if (read_realtime) {
1abbee
-                r = sd_journal_get_realtime_usec(j, &next_boot->last);
1abbee
-                if (r < 0)
1abbee
-                        return r;
1abbee
-        }
1abbee
+        r = sd_journal_get_realtime_usec(j, &next_boot->last);
1abbee
+        if (r < 0)
1abbee
+                return r;
1abbee
 
1abbee
-        *boot = next_boot;
1abbee
+        *ret = next_boot;
1abbee
         next_boot = NULL;
1abbee
 
1abbee
         return 0;
1abbee
@@ -1032,6 +1041,7 @@ static int get_boots(
1abbee
         int r, count = 0;
1abbee
         BootId *head = NULL, *tail = NULL;
1abbee
         const bool advance_older = query_ref_boot && ref_boot_offset <= 0;
1abbee
+        sd_id128_t previous_boot_id;
1abbee
 
1abbee
         assert(j);
1abbee
 
1abbee
@@ -1085,10 +1095,11 @@ static int get_boots(
1abbee
                 /* No sd_journal_next/previous here. */
1abbee
         }
1abbee
 
1abbee
+        previous_boot_id = SD_ID128_NULL;
1abbee
         for (;;) {
1abbee
                 _cleanup_free_ BootId *current = NULL;
1abbee
 
1abbee
-                r = discover_next_boot(j, &current, advance_older, !query_ref_boot);
1abbee
+                r = discover_next_boot(j, previous_boot_id, advance_older, ¤t;;
1abbee
                 if (r < 0) {
1abbee
                         boot_id_free_all(head);
1abbee
                         return r;
1abbee
@@ -1097,6 +1108,8 @@ static int get_boots(
1abbee
                 if (!current)
1abbee
                         break;
1abbee
 
1abbee
+                previous_boot_id = current->id;
1abbee
+
1abbee
                 if (query_ref_boot) {
1abbee
                         if (!skip_once)
1abbee
                                 ref_boot_offset += advance_older ? 1 : -1;