Blame SOURCES/0001-Add-an-ostree-boot-complete.service-to-propagate-sta.patch

a98e04
From a6d45dc165e48e2a463880ebb90f34c2b9d3c4ce Mon Sep 17 00:00:00 2001
a98e04
From: Colin Walters <walters@verbum.org>
a98e04
Date: Fri, 22 Apr 2022 18:46:28 -0400
a98e04
Subject: [PATCH 1/6] Add an `ostree-boot-complete.service` to propagate
a98e04
 staging failures
a98e04
a98e04
Quite a while ago we added staged deployments, which solved
a98e04
a bunch of issues around the `/etc` merge.  However...a persistent
a98e04
problem since then is that any failures in that process that
a98e04
happened in the *previous* boot are not very visible.
a98e04
a98e04
We ship custom code in `rpm-ostree status` to query the previous
a98e04
journal.  But that has a few problems - one is that on systems
a98e04
that have been up a while, that failure message may even get
a98e04
rotated out.  And second, some systems may not even have a persistent
a98e04
journal at all.
a98e04
a98e04
A general thing we do in e.g. Fedora CoreOS testing is to check
a98e04
for systemd unit failures.  We do that both in our automated tests,
a98e04
and we even ship code that displays them on ssh logins.  And beyond
a98e04
that obviously a lot of other projects do the same; it's easy via
a98e04
`systemctl --failed`.
a98e04
a98e04
So to make failures more visible, change our `ostree-finalize-staged.service`
a98e04
to have an internal wrapper around the process that "catches" any
a98e04
errors, and copies the error message into a file in `/boot/ostree`.
a98e04
a98e04
Then, a new `ostree-boot-complete.service` looks for this file on
a98e04
startup and re-emits the error message, and fails.
a98e04
a98e04
It also deletes the file.  The rationale is to avoid *continually*
a98e04
warning.  For example we need to handle the case when an upgrade
a98e04
process creates a new staged deployment.  Now, we could change the
a98e04
ostree core code to delete the warning file when that happens instead,
a98e04
but this is trying to be a conservative change.
a98e04
a98e04
This should make failures here much more visible as is.
a98e04
---
a98e04
 Makefile-boot.am                             |  2 +
a98e04
 Makefile-ostree.am                           |  1 +
a98e04
 src/boot/ostree-boot-complete.service        | 33 +++++++++++
a98e04
 src/libostree/ostree-cmdprivate.c            |  1 +
a98e04
 src/libostree/ostree-cmdprivate.h            |  1 +
a98e04
 src/libostree/ostree-impl-system-generator.c |  2 +
a98e04
 src/libostree/ostree-sysroot-deploy.c        | 62 ++++++++++++++++++--
a98e04
 src/libostree/ostree-sysroot-private.h       |  7 +++
a98e04
 src/libostree/ostree-sysroot.c               |  2 +
a98e04
 src/ostree/ot-admin-builtin-boot-complete.c  | 58 ++++++++++++++++++
a98e04
 src/ostree/ot-admin-builtins.h               |  1 +
a98e04
 src/ostree/ot-builtin-admin.c                |  3 +
a98e04
 tests/kolainst/destructive/staged-deploy.sh  | 12 ++++
a98e04
 13 files changed, 181 insertions(+), 4 deletions(-)
a98e04
 create mode 100644 src/boot/ostree-boot-complete.service
a98e04
 create mode 100644 src/ostree/ot-admin-builtin-boot-complete.c
a98e04
a98e04
diff --git a/Makefile-boot.am b/Makefile-boot.am
a98e04
index ec10a0d6..e42e5180 100644
a98e04
--- a/Makefile-boot.am
a98e04
+++ b/Makefile-boot.am
a98e04
@@ -38,6 +38,7 @@ endif
a98e04
 if BUILDOPT_SYSTEMD
a98e04
 systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
a98e04
 	src/boot/ostree-remount.service \
a98e04
+	src/boot/ostree-boot-complete.service \
a98e04
 	src/boot/ostree-finalize-staged.service \
a98e04
 	src/boot/ostree-finalize-staged.path \
a98e04
 	$(NULL)
a98e04
@@ -64,6 +65,7 @@ endif
a98e04
 EXTRA_DIST += src/boot/dracut/module-setup.sh \
a98e04
 	src/boot/dracut/ostree.conf \
a98e04
 	src/boot/mkinitcpio \
a98e04
+	src/boot/ostree-boot-complete.service \
a98e04
 	src/boot/ostree-prepare-root.service \
a98e04
 	src/boot/ostree-finalize-staged.path \
a98e04
 	src/boot/ostree-remount.service \
a98e04
diff --git a/Makefile-ostree.am b/Makefile-ostree.am
a98e04
index 82af1681..0fe2c5f8 100644
a98e04
--- a/Makefile-ostree.am
a98e04
+++ b/Makefile-ostree.am
a98e04
@@ -70,6 +70,7 @@ ostree_SOURCES += \
a98e04
 	src/ostree/ot-admin-builtin-diff.c \
a98e04
 	src/ostree/ot-admin-builtin-deploy.c \
a98e04
 	src/ostree/ot-admin-builtin-finalize-staged.c \
a98e04
+	src/ostree/ot-admin-builtin-boot-complete.c \
a98e04
 	src/ostree/ot-admin-builtin-undeploy.c \
a98e04
 	src/ostree/ot-admin-builtin-instutil.c \
a98e04
 	src/ostree/ot-admin-builtin-cleanup.c \
a98e04
diff --git a/src/boot/ostree-boot-complete.service b/src/boot/ostree-boot-complete.service
a98e04
new file mode 100644
a98e04
index 00000000..5c09fdc9
a98e04
--- /dev/null
a98e04
+++ b/src/boot/ostree-boot-complete.service
a98e04
@@ -0,0 +1,33 @@
a98e04
+# Copyright (C) 2022 Red Hat, Inc.
a98e04
+#
a98e04
+# This library is free software; you can redistribute it and/or
a98e04
+# modify it under the terms of the GNU Lesser General Public
a98e04
+# License as published by the Free Software Foundation; either
a98e04
+# version 2 of the License, or (at your option) any later version.
a98e04
+#
a98e04
+# This library is distributed in the hope that it will be useful,
a98e04
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
a98e04
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
a98e04
+# Lesser General Public License for more details.
a98e04
+#
a98e04
+# You should have received a copy of the GNU Lesser General Public
a98e04
+# License along with this library. If not, see <https://www.gnu.org/licenses/>.
a98e04
+
a98e04
+[Unit]
a98e04
+Description=OSTree Complete Boot
a98e04
+Documentation=man:ostree(1)
a98e04
+# For now, this is the only condition on which we start, but it's
a98e04
+# marked as a triggering condition in case in the future we want
a98e04
+# to do something else.
a98e04
+ConditionPathExists=|/boot/ostree/finalize-failure.stamp
a98e04
+RequiresMountsFor=/boot
a98e04
+# Ensure that we propagate the failure into the current boot before
a98e04
+# any further finalization attempts.
a98e04
+Before=ostree-finalize-staged.service
a98e04
+
a98e04
+[Service]
a98e04
+Type=oneshot
a98e04
+# To write to /boot while keeping it read-only
a98e04
+MountFlags=slave
a98e04
+RemainAfterExit=yes
a98e04
+ExecStart=/usr/bin/ostree admin boot-complete
a98e04
diff --git a/src/libostree/ostree-cmdprivate.c b/src/libostree/ostree-cmdprivate.c
a98e04
index c9a6e2e1..f6c114f4 100644
a98e04
--- a/src/libostree/ostree-cmdprivate.c
a98e04
+++ b/src/libostree/ostree-cmdprivate.c
a98e04
@@ -51,6 +51,7 @@ ostree_cmd__private__ (void)
a98e04
     _ostree_repo_static_delta_delete,
a98e04
     _ostree_repo_verify_bindings,
a98e04
     _ostree_sysroot_finalize_staged,
a98e04
+    _ostree_sysroot_boot_complete,
a98e04
   };
a98e04
 
a98e04
   return &table;
a98e04
diff --git a/src/libostree/ostree-cmdprivate.h b/src/libostree/ostree-cmdprivate.h
a98e04
index 46452ebd..17f943c8 100644
a98e04
--- a/src/libostree/ostree-cmdprivate.h
a98e04
+++ b/src/libostree/ostree-cmdprivate.h
a98e04
@@ -33,6 +33,7 @@ typedef struct {
a98e04
   gboolean (* ostree_static_delta_delete) (OstreeRepo *repo, const char *delta_id, GCancellable *cancellable, GError **error);
a98e04
   gboolean (* ostree_repo_verify_bindings) (const char *collection_id, const char *ref_name, GVariant *commit, GError **error);
a98e04
   gboolean (* ostree_finalize_staged) (OstreeSysroot *sysroot, GCancellable *cancellable, GError **error);
a98e04
+  gboolean (* ostree_boot_complete) (OstreeSysroot *sysroot, GCancellable *cancellable, GError **error);
a98e04
 } OstreeCmdPrivateVTable;
a98e04
 
a98e04
 /* Note this not really "public", we just export the symbol, but not the header */
a98e04
diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c
a98e04
index 769f0cbd..92d71605 100644
a98e04
--- a/src/libostree/ostree-impl-system-generator.c
a98e04
+++ b/src/libostree/ostree-impl-system-generator.c
a98e04
@@ -134,6 +134,8 @@ require_internal_units (const char *normal_dir,
a98e04
     return FALSE;
a98e04
   if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-finalize-staged.path", normal_dir_dfd, "multi-user.target.wants/ostree-finalize-staged.path") < 0)
a98e04
     return glnx_throw_errno_prefix (error, "symlinkat");
a98e04
+  if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-boot-complete.service", normal_dir_dfd, "multi-user.target.wants/ostree-boot-complete.service") < 0)
a98e04
+    return glnx_throw_errno_prefix (error, "symlinkat");
a98e04
 
a98e04
   return TRUE;
a98e04
 #else
a98e04
diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c
a98e04
index b7cc232f..fc5916d8 100644
a98e04
--- a/src/libostree/ostree-sysroot-deploy.c
a98e04
+++ b/src/libostree/ostree-sysroot-deploy.c
a98e04
@@ -3255,10 +3255,10 @@ ostree_sysroot_stage_tree_with_options (OstreeSysroot     *self,
a98e04
 }
a98e04
 
a98e04
 /* Invoked at shutdown time by ostree-finalize-staged.service */
a98e04
-gboolean
a98e04
-_ostree_sysroot_finalize_staged (OstreeSysroot *self,
a98e04
-                                 GCancellable  *cancellable,
a98e04
-                                 GError       **error)
a98e04
+static gboolean
a98e04
+_ostree_sysroot_finalize_staged_inner (OstreeSysroot *self,
a98e04
+                                       GCancellable  *cancellable,
a98e04
+                                       GError       **error)
a98e04
 {
a98e04
   /* It's totally fine if there's no staged deployment; perhaps down the line
a98e04
    * though we could teach the ostree cmdline to tell systemd to activate the
a98e04
@@ -3355,9 +3355,63 @@ _ostree_sysroot_finalize_staged (OstreeSysroot *self,
a98e04
   if (!ostree_sysroot_prepare_cleanup (self, cancellable, error))
a98e04
     return FALSE;
a98e04
 
a98e04
+  // Cleanup will have closed some FDs, re-ensure writability
a98e04
+  if (!_ostree_sysroot_ensure_writable (self, error))
a98e04
+    return FALSE;
a98e04
+
a98e04
   return TRUE;
a98e04
 }
a98e04
 
a98e04
+/* Invoked at shutdown time by ostree-finalize-staged.service */
a98e04
+gboolean
a98e04
+_ostree_sysroot_finalize_staged (OstreeSysroot *self,
a98e04
+                                 GCancellable  *cancellable,
a98e04
+                                 GError       **error)
a98e04
+{
a98e04
+  g_autoptr(GError) finalization_error = NULL;
a98e04
+  if (!_ostree_sysroot_ensure_boot_fd (self, error))
a98e04
+    return FALSE;
a98e04
+  if (!_ostree_sysroot_finalize_staged_inner (self, cancellable, &finalization_error))
a98e04
+    {
a98e04
+      g_autoptr(GError) writing_error = NULL;
a98e04
+      g_assert_cmpint (self->boot_fd, !=, -1);
a98e04
+      if (!glnx_file_replace_contents_at (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH, 
a98e04
+                                           (guint8*)finalization_error->message, -1,
a98e04
+                                           0, cancellable, &writing_error))
a98e04
+        {
a98e04
+          // We somehow failed to write the failure message...that's not great.  Maybe ENOSPC on /boot.
a98e04
+          g_printerr ("Failed to write %s: %s\n", _OSTREE_FINALIZE_STAGED_FAILURE_PATH, writing_error->message);
a98e04
+        }
a98e04
+      g_propagate_error (error, g_steal_pointer (&finalization_error));
a98e04
+      return FALSE;
a98e04
+    }
a98e04
+  return TRUE;
a98e04
+}
a98e04
+
a98e04
+/* Invoked at bootup time by ostree-boot-complete.service */
a98e04
+gboolean
a98e04
+_ostree_sysroot_boot_complete (OstreeSysroot *self,
a98e04
+                               GCancellable  *cancellable,
a98e04
+                               GError       **error)
a98e04
+{
a98e04
+  if (!_ostree_sysroot_ensure_boot_fd (self, error))
a98e04
+    return FALSE;
a98e04
+
a98e04
+  glnx_autofd int failure_fd = -1;
a98e04
+  if (!ot_openat_ignore_enoent (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH, &failure_fd, error))
a98e04
+    return FALSE;
a98e04
+  // If we didn't find a failure log, then there's nothing to do right now.
a98e04
+  // (Actually this unit shouldn't even be invoked, but we may do more in the future)
a98e04
+  if (failure_fd == -1)
a98e04
+    return TRUE;
a98e04
+  g_autofree char *failure_data = glnx_fd_readall_utf8 (failure_fd, NULL, cancellable, error);
a98e04
+  if (failure_data == NULL)
a98e04
+    return glnx_prefix_error (error, "Reading from %s", _OSTREE_FINALIZE_STAGED_FAILURE_PATH);
a98e04
+  // Remove the file; we don't want to continually error out.
a98e04
+  (void) unlinkat (self->boot_fd, _OSTREE_FINALIZE_STAGED_FAILURE_PATH, 0);
a98e04
+  return glnx_throw (error, "ostree-finalize-staged.service failed on previous boot: %s", failure_data);
a98e04
+}
a98e04
+
a98e04
 /**
a98e04
  * ostree_sysroot_deployment_set_kargs:
a98e04
  * @self: Sysroot
a98e04
diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h
a98e04
index cb34eeb3..a49a406c 100644
a98e04
--- a/src/libostree/ostree-sysroot-private.h
a98e04
+++ b/src/libostree/ostree-sysroot-private.h
a98e04
@@ -96,6 +96,9 @@ struct OstreeSysroot {
a98e04
 #define _OSTREE_SYSROOT_BOOT_INITRAMFS_OVERLAYS "ostree/initramfs-overlays"
a98e04
 #define _OSTREE_SYSROOT_INITRAMFS_OVERLAYS "boot/" _OSTREE_SYSROOT_BOOT_INITRAMFS_OVERLAYS
a98e04
 
a98e04
+// Relative to /boot, consumed by ostree-boot-complete.service
a98e04
+#define _OSTREE_FINALIZE_STAGED_FAILURE_PATH "ostree/finalize-failure.stamp"
a98e04
+
a98e04
 gboolean
a98e04
 _ostree_sysroot_ensure_writable (OstreeSysroot      *self,
a98e04
                                  GError            **error);
a98e04
@@ -142,6 +145,10 @@ gboolean
a98e04
 _ostree_sysroot_finalize_staged (OstreeSysroot *self,
a98e04
                                  GCancellable  *cancellable,
a98e04
                                  GError       **error);
a98e04
+gboolean
a98e04
+_ostree_sysroot_boot_complete (OstreeSysroot *self,
a98e04
+                               GCancellable  *cancellable,
a98e04
+                               GError       **error);
a98e04
 
a98e04
 OstreeDeployment *
a98e04
 _ostree_sysroot_deserialize_deployment_from_variant (GVariant *v,
a98e04
diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c
a98e04
index 266a2975..f083f950 100644
a98e04
--- a/src/libostree/ostree-sysroot.c
a98e04
+++ b/src/libostree/ostree-sysroot.c
a98e04
@@ -356,6 +356,8 @@ _ostree_sysroot_ensure_writable (OstreeSysroot      *self,
a98e04
   ostree_sysroot_unload (self);
a98e04
   if (!ensure_sysroot_fd (self, error))
a98e04
     return FALSE;
a98e04
+  if (!_ostree_sysroot_ensure_boot_fd (self, error))
a98e04
+    return FALSE;
a98e04
 
a98e04
   return TRUE;
a98e04
 }
a98e04
diff --git a/src/ostree/ot-admin-builtin-boot-complete.c b/src/ostree/ot-admin-builtin-boot-complete.c
a98e04
new file mode 100644
a98e04
index 00000000..6e1052f5
a98e04
--- /dev/null
a98e04
+++ b/src/ostree/ot-admin-builtin-boot-complete.c
a98e04
@@ -0,0 +1,58 @@
a98e04
+/*
a98e04
+ * Copyright (C) 2022 Red Hat, Inc.
a98e04
+ *
a98e04
+ * SPDX-License-Identifier: LGPL-2.0+
a98e04
+ *
a98e04
+ * This library is free software; you can redistribute it and/or
a98e04
+ * modify it under the terms of the GNU Lesser General Public
a98e04
+ * License as published by the Free Software Foundation; either
a98e04
+ * version 2 of the License, or (at your option) any later version.
a98e04
+ *
a98e04
+ * This library is distributed in the hope that it will be useful,
a98e04
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
a98e04
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
a98e04
+ * Lesser General Public License for more details.
a98e04
+ *
a98e04
+ * You should have received a copy of the GNU Lesser General Public
a98e04
+ * License along with this library. If not, see <https://www.gnu.org/licenses/>.
a98e04
+ */
a98e04
+
a98e04
+#include "config.h"
a98e04
+
a98e04
+#include <stdlib.h>
a98e04
+
a98e04
+#include "ot-main.h"
a98e04
+#include "ot-admin-builtins.h"
a98e04
+#include "ot-admin-functions.h"
a98e04
+#include "ostree.h"
a98e04
+#include "otutil.h"
a98e04
+
a98e04
+#include "ostree-cmdprivate.h"
a98e04
+
a98e04
+static GOptionEntry options[] = {
a98e04
+  { NULL }
a98e04
+};
a98e04
+
a98e04
+gboolean
a98e04
+ot_admin_builtin_boot_complete (int argc, char **argv, OstreeCommandInvocation *invocation, GCancellable *cancellable, GError **error)
a98e04
+{
a98e04
+  /* Just a sanity check; we shouldn't be called outside of the service though.
a98e04
+   */
a98e04
+  struct stat stbuf;
a98e04
+  if (fstatat (AT_FDCWD, OSTREE_PATH_BOOTED, &stbuf, 0) < 0)
a98e04
+    return TRUE;
a98e04
+  // We must have been invoked via systemd which should have set up a mount namespace.
a98e04
+  g_assert (getenv ("INVOCATION_ID"));
a98e04
+
a98e04
+  g_autoptr(GOptionContext) context = g_option_context_new ("");
a98e04
+  g_autoptr(OstreeSysroot) sysroot = NULL;
a98e04
+  if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
a98e04
+                                          OSTREE_ADMIN_BUILTIN_FLAG_SUPERUSER,
a98e04
+                                          invocation, &sysroot, cancellable, error))
a98e04
+    return FALSE;
a98e04
+
a98e04
+  if (!ostree_cmd__private__()->ostree_boot_complete (sysroot, cancellable, error))
a98e04
+    return FALSE;
a98e04
+
a98e04
+  return TRUE;
a98e04
+}
a98e04
diff --git a/src/ostree/ot-admin-builtins.h b/src/ostree/ot-admin-builtins.h
a98e04
index d32b617e..8d9451be 100644
a98e04
--- a/src/ostree/ot-admin-builtins.h
a98e04
+++ b/src/ostree/ot-admin-builtins.h
a98e04
@@ -39,6 +39,7 @@ BUILTINPROTO(deploy);
a98e04
 BUILTINPROTO(cleanup);
a98e04
 BUILTINPROTO(pin);
a98e04
 BUILTINPROTO(finalize_staged);
a98e04
+BUILTINPROTO(boot_complete);
a98e04
 BUILTINPROTO(unlock);
a98e04
 BUILTINPROTO(status);
a98e04
 BUILTINPROTO(set_origin);
a98e04
diff --git a/src/ostree/ot-builtin-admin.c b/src/ostree/ot-builtin-admin.c
a98e04
index e0d2a60c..af09a614 100644
a98e04
--- a/src/ostree/ot-builtin-admin.c
a98e04
+++ b/src/ostree/ot-builtin-admin.c
a98e04
@@ -43,6 +43,9 @@ static OstreeCommand admin_subcommands[] = {
a98e04
   { "finalize-staged", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
a98e04
     ot_admin_builtin_finalize_staged,
a98e04
     "Internal command to run at shutdown time" },
a98e04
+  { "boot-complete", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
a98e04
+    ot_admin_builtin_boot_complete,
a98e04
+    "Internal command to run at boot after an update was applied" },
a98e04
   { "init-fs", OSTREE_BUILTIN_FLAG_NO_REPO,
a98e04
      ot_admin_builtin_init_fs,
a98e04
     "Initialize a root filesystem" },