5808e7
From 2f75df5cd6dcd56775fec9e89fc79672e702d826 Mon Sep 17 00:00:00 2001
5808e7
From: Eric DeVolder <eric.devolder@oracle.com>
5808e7
Date: Thu, 16 May 2019 08:59:01 -0500
5808e7
Subject: [PATCH] pstore: Tool to archive contents of pstore
5808e7
MIME-Version: 1.0
5808e7
Content-Type: text/plain; charset=UTF-8
5808e7
Content-Transfer-Encoding: 8bit
5808e7
5808e7
This patch introduces the systemd pstore service which will archive the
5808e7
contents of the Linux persistent storage filesystem, pstore, to other storage,
5808e7
thus preserving the existing information contained in the pstore, and clearing
5808e7
pstore storage for future error events.
5808e7
5808e7
Linux provides a persistent storage file system, pstore[1], that can store
5808e7
error records when the kernel dies (or reboots or powers-off). These records in
5808e7
turn can be referenced to debug kernel problems (currently the kernel stuffs
5808e7
the tail of the dmesg, which also contains a stack backtrace, into pstore).
5808e7
5808e7
The pstore file system supports a variety of backends that map onto persistent
5808e7
storage, such as the ACPI ERST[2, Section 18.5 Error Serialization] and UEFI
5808e7
variables[3 Appendix N Common Platform Error Record]. The pstore backends
5808e7
typically offer a relatively small amount of persistent storage, e.g. 64KiB,
5808e7
which can quickly fill up and thus prevent subsequent kernel crashes from
5808e7
recording errors. Thus there is a need to monitor and extract the pstore
5808e7
contents so that future kernel problems can also record information in the
5808e7
pstore.
5808e7
5808e7
The pstore service is independent of the kdump service. In cloud environments
5808e7
specifically, host and guest filesystems are on remote filesystems (eg. iSCSI
5808e7
or NFS), thus kdump relies [implicitly and/or explicitly] upon proper operation
5808e7
of networking software *and* hardware *and* infrastructure.  Thus it may not be
5808e7
possible to capture a kernel coredump to a file since writes over the network
5808e7
may not be possible.
5808e7
5808e7
The pstore backend, on the other hand, is completely local and provides a path
5808e7
to store error records which will survive a reboot and aid in post-mortem
5808e7
debugging.
5808e7
5808e7
Usage Notes:
5808e7
This tool moves files from /sys/fs/pstore into /var/lib/systemd/pstore.
5808e7
5808e7
To enable kernel recording of error records into pstore, one must either pass
5808e7
crash_kexec_post_notifiers[4] to the kernel command line or enable via 'echo Y
5808e7
 > /sys/module/kernel/parameters/crash_kexec_post_notifiers'. This option
5808e7
invokes the recording of errors into pstore *before* an attempt to kexec/kdump
5808e7
on a kernel crash.
5808e7
5808e7
Optionally, to record reboots and shutdowns in the pstore, one can either pass
5808e7
the printk.always_kmsg_dump[4] to the kernel command line or enable via 'echo Y >
5808e7
/sys/module/printk/parameters/always_kmsg_dump'. This option enables code on the
5808e7
shutdown path to record information via pstore.
5808e7
5808e7
This pstore service is a oneshot service. When run, the service invokes
5808e7
systemd-pstore which is a tool that performs the following:
5808e7
 - reads the pstore.conf configuration file
5808e7
 - collects the lists of files in the pstore (eg. /sys/fs/pstore)
5808e7
 - for certain file types (eg. dmesg) a handler is invoked
5808e7
 - for all other files, the file is moved from pstore
5808e7
5808e7
 - In the case of dmesg handler, final processing occurs as such:
5808e7
   - files processed in reverse lexigraphical order to faciliate
5808e7
     reconstruction of original dmesg
5808e7
   - the filename is examined to determine which dmesg it is a part
5808e7
   - the file is appended to the reconstructed dmesg
5808e7
5808e7
For example, the following pstore contents:
5808e7
5808e7
 root@vm356:~# ls -al /sys/fs/pstore
5808e7
 total 0
5808e7
 drwxr-x--- 2 root root    0 May  9 09:50 .
5808e7
 drwxr-xr-x 7 root root    0 May  9 09:50 ..
5808e7
 -r--r--r-- 1 root root 1610 May  9 09:49 dmesg-efi-155741337601001
5808e7
 -r--r--r-- 1 root root 1778 May  9 09:49 dmesg-efi-155741337602001
5808e7
 -r--r--r-- 1 root root 1726 May  9 09:49 dmesg-efi-155741337603001
5808e7
 -r--r--r-- 1 root root 1746 May  9 09:49 dmesg-efi-155741337604001
5808e7
 -r--r--r-- 1 root root 1686 May  9 09:49 dmesg-efi-155741337605001
5808e7
 -r--r--r-- 1 root root 1690 May  9 09:49 dmesg-efi-155741337606001
5808e7
 -r--r--r-- 1 root root 1775 May  9 09:49 dmesg-efi-155741337607001
5808e7
 -r--r--r-- 1 root root 1811 May  9 09:49 dmesg-efi-155741337608001
5808e7
 -r--r--r-- 1 root root 1817 May  9 09:49 dmesg-efi-155741337609001
5808e7
 -r--r--r-- 1 root root 1795 May  9 09:49 dmesg-efi-155741337710001
5808e7
 -r--r--r-- 1 root root 1770 May  9 09:49 dmesg-efi-155741337711001
5808e7
 -r--r--r-- 1 root root 1796 May  9 09:49 dmesg-efi-155741337712001
5808e7
 -r--r--r-- 1 root root 1787 May  9 09:49 dmesg-efi-155741337713001
5808e7
 -r--r--r-- 1 root root 1808 May  9 09:49 dmesg-efi-155741337714001
5808e7
 -r--r--r-- 1 root root 1754 May  9 09:49 dmesg-efi-155741337715001
5808e7
5808e7
results in the following:
5808e7
5808e7
 root@vm356:~# ls -al /var/lib/systemd/pstore/155741337/
5808e7
 total 92
5808e7
 drwxr-xr-x 2 root root  4096 May  9 09:50 .
5808e7
 drwxr-xr-x 4 root root    40 May  9 09:50 ..
5808e7
 -rw-r--r-- 1 root root  1610 May  9 09:50 dmesg-efi-155741337601001
5808e7
 -rw-r--r-- 1 root root  1778 May  9 09:50 dmesg-efi-155741337602001
5808e7
 -rw-r--r-- 1 root root  1726 May  9 09:50 dmesg-efi-155741337603001
5808e7
 -rw-r--r-- 1 root root  1746 May  9 09:50 dmesg-efi-155741337604001
5808e7
 -rw-r--r-- 1 root root  1686 May  9 09:50 dmesg-efi-155741337605001
5808e7
 -rw-r--r-- 1 root root  1690 May  9 09:50 dmesg-efi-155741337606001
5808e7
 -rw-r--r-- 1 root root  1775 May  9 09:50 dmesg-efi-155741337607001
5808e7
 -rw-r--r-- 1 root root  1811 May  9 09:50 dmesg-efi-155741337608001
5808e7
 -rw-r--r-- 1 root root  1817 May  9 09:50 dmesg-efi-155741337609001
5808e7
 -rw-r--r-- 1 root root  1795 May  9 09:50 dmesg-efi-155741337710001
5808e7
 -rw-r--r-- 1 root root  1770 May  9 09:50 dmesg-efi-155741337711001
5808e7
 -rw-r--r-- 1 root root  1796 May  9 09:50 dmesg-efi-155741337712001
5808e7
 -rw-r--r-- 1 root root  1787 May  9 09:50 dmesg-efi-155741337713001
5808e7
 -rw-r--r-- 1 root root  1808 May  9 09:50 dmesg-efi-155741337714001
5808e7
 -rw-r--r-- 1 root root  1754 May  9 09:50 dmesg-efi-155741337715001
5808e7
 -rw-r--r-- 1 root root 26754 May  9 09:50 dmesg.txt
5808e7
5808e7
where dmesg.txt is reconstructed from the group of related
5808e7
dmesg-efi-155741337* files.
5808e7
5808e7
Configuration file:
5808e7
The pstore.conf configuration file has four settings, described below.
5808e7
 - Storage : one of "none", "external", or "journal". With "none", this
5808e7
   tool leaves the contents of pstore untouched. With "external", the
5808e7
   contents of the pstore are moved into the /var/lib/systemd/pstore,
5808e7
   as well as logged into the journal.  With "journal", the contents of
5808e7
   the pstore are recorded only in the systemd journal. The default is
5808e7
   "external".
5808e7
 - Unlink : is a boolean. When "true", the default, then files in the
5808e7
   pstore are removed once processed. When "false", processing of the
5808e7
   pstore occurs normally, but the pstore files remain.
5808e7
5808e7
References:
5808e7
[1] "Persistent storage for a kernel's dying breath",
5808e7
    March 23, 2011.
5808e7
    https://lwn.net/Articles/434821/
5808e7
5808e7
[2] "Advanced Configuration and Power Interface Specification",
5808e7
    version 6.2, May 2017.
5808e7
    https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
5808e7
5808e7
[3] "Unified Extensible Firmware Interface Specification",
5808e7
    version 2.8, March 2019.
5808e7
    https://uefi.org/sites/default/files/resources/UEFI_Spec_2_8_final.pdf
5808e7
5808e7
[4] "The kernel’s command-line parameters",
5808e7
    https://static.lwn.net/kerneldoc/admin-guide/kernel-parameters.html
5808e7
5808e7
(cherry picked from commit 9b4abc69b201e5d7295e1b0762883659f053e747)
5808e7
5808e7
Resolves: #2158832
5808e7
---
5808e7
 man/pstore.conf.xml             |  89 +++++++
5808e7
 man/rules/meson.build           |   2 +
5808e7
 man/systemd-pstore.xml          |  99 ++++++++
5808e7
 meson.build                     |  20 ++
5808e7
 meson_options.txt               |   2 +
5808e7
 src/pstore/meson.build          |  10 +
5808e7
 src/pstore/pstore.c             | 395 ++++++++++++++++++++++++++++++++
5808e7
 src/pstore/pstore.conf          |  16 ++
5808e7
 units/meson.build               |   1 +
5808e7
 units/systemd-pstore.service.in |  24 ++
5808e7
 10 files changed, 658 insertions(+)
5808e7
 create mode 100644 man/pstore.conf.xml
5808e7
 create mode 100644 man/systemd-pstore.xml
5808e7
 create mode 100644 src/pstore/meson.build
5808e7
 create mode 100644 src/pstore/pstore.c
5808e7
 create mode 100644 src/pstore/pstore.conf
5808e7
 create mode 100644 units/systemd-pstore.service.in
5808e7
5808e7
diff --git a/man/pstore.conf.xml b/man/pstore.conf.xml
5808e7
new file mode 100644
5808e7
index 0000000000..b5cda47d02
5808e7
--- /dev/null
5808e7
+++ b/man/pstore.conf.xml
5808e7
@@ -0,0 +1,89 @@
5808e7
+
5808e7
+
5808e7
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
5808e7
+
5808e7
+
5808e7
+
5808e7
+          xmlns:xi="http://www.w3.org/2001/XInclude">
5808e7
+  <refentryinfo>
5808e7
+    <title>pstore.conf</title>
5808e7
+    <productname>systemd</productname>
5808e7
+  </refentryinfo>
5808e7
+
5808e7
+  <refmeta>
5808e7
+    <refentrytitle>pstore.conf</refentrytitle>
5808e7
+    <manvolnum>5</manvolnum>
5808e7
+  </refmeta>
5808e7
+
5808e7
+  <refnamediv>
5808e7
+    <refname>pstore.conf</refname>
5808e7
+    <refname>pstore.conf.d</refname>
5808e7
+    <refpurpose>PStore configuration file</refpurpose>
5808e7
+  </refnamediv>
5808e7
+
5808e7
+  <refsynopsisdiv>
5808e7
+    <para>
5808e7
+    <filename>/etc/systemd/pstore.conf</filename>
5808e7
+    <filename>/etc/systemd/pstore.conf.d/*</filename>
5808e7
+    </para>
5808e7
+  </refsynopsisdiv>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>Description</title>
5808e7
+
5808e7
+    <para>This file configures the behavior of
5808e7
+    <citerefentry><refentrytitle>systemd-pstore</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
5808e7
+    a tool for archiving the contents of the persistent storage filesystem,
5808e7
+    <ulink url="https://www.kernel.org/doc/Documentation/ABI/testing/pstore">pstore</ulink>.
5808e7
+    </para>
5808e7
+  </refsect1>
5808e7
+
5808e7
+  <xi:include href="standard-conf.xml" xpointer="main-conf" />
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>Options</title>
5808e7
+
5808e7
+    <para>All options are configured in the
5808e7
+    <literal>[PStore]</literal> section:</para>
5808e7
+
5808e7
+    <variablelist>
5808e7
+
5808e7
+      <varlistentry>
5808e7
+        <term><varname>Storage=</varname></term>
5808e7
+
5808e7
+        <listitem><para>Controls where to archive (i.e. copy) files from the pstore filesystem. One of <literal>none</literal>,
5808e7
+        <literal>external</literal>, and <literal>journal</literal>. When
5808e7
+        <literal>none</literal>, the tool exits without processing files in the pstore filesystem.
5808e7
+        When <literal>external</literal> (the default), files are archived into <filename>/var/lib/systemd/pstore/</filename>,
5808e7
+        and logged into the journal.
5808e7
+        When <literal>journal</literal>, pstore file contents are logged only in the journal.</para>
5808e7
+        </listitem>
5808e7
+
5808e7
+      </varlistentry>
5808e7
+
5808e7
+      <varlistentry>
5808e7
+        <term><varname>Unlink=</varname></term>
5808e7
+
5808e7
+        <listitem><para>Controls whether or not files are removed from pstore after processing.
5808e7
+        Takes a boolean value. When true, a pstore file is removed from the pstore once it has been
5808e7
+        archived (either to disk or into the journal). When false, processing of pstore files occurs
5808e7
+        normally, but the files remain in the pstore.
5808e7
+        The default is true in order to maintain the pstore in a nearly empty state, so that the pstore
5808e7
+        has storage available for the next kernel error event.
5808e7
+        </para></listitem>
5808e7
+      </varlistentry>
5808e7
+    </variablelist>
5808e7
+
5808e7
+    <para>The defaults for all values are listed as comments in the
5808e7
+    template <filename>/etc/systemd/pstore.conf</filename> file that
5808e7
+    is installed by default.</para>
5808e7
+  </refsect1>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>See Also</title>
5808e7
+    <para>
5808e7
+      <citerefentry><refentrytitle>systemd-journald.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
5808e7
+    </para>
5808e7
+  </refsect1>
5808e7
+
5808e7
+</refentry>
5808e7
diff --git a/man/rules/meson.build b/man/rules/meson.build
5808e7
index e6c0a99bbd..6295330c5e 100644
5808e7
--- a/man/rules/meson.build
5808e7
+++ b/man/rules/meson.build
5808e7
@@ -44,6 +44,7 @@ manpages = [
5808e7
  ['os-release', '5', [], ''],
5808e7
  ['pam_systemd', '8', [], 'HAVE_PAM'],
5808e7
  ['portablectl', '1', [], 'ENABLE_PORTABLED'],
5808e7
+ ['pstore.conf', '5', ['pstore.conf.d'], 'ENABLE_PSTORE'],
5808e7
  ['resolvectl', '1', ['resolvconf'], 'ENABLE_RESOLVE'],
5808e7
  ['resolved.conf', '5', ['resolved.conf.d'], 'ENABLE_RESOLVE'],
5808e7
  ['runlevel', '8', [], 'ENABLE_UTMP'],
5808e7
@@ -633,6 +634,7 @@ manpages = [
5808e7
  ['systemd-nspawn', '1', [], ''],
5808e7
  ['systemd-path', '1', [], ''],
5808e7
  ['systemd-portabled.service', '8', ['systemd-portabled'], 'ENABLE_PORTABLED'],
5808e7
+ ['systemd-pstore', '8', ['systemd-pstore.service'], 'ENABLE_PSTORE'],
5808e7
  ['systemd-quotacheck.service',
5808e7
   '8',
5808e7
   ['systemd-quotacheck'],
5808e7
diff --git a/man/systemd-pstore.xml b/man/systemd-pstore.xml
5808e7
new file mode 100644
5808e7
index 0000000000..dd1aa5e83b
5808e7
--- /dev/null
5808e7
+++ b/man/systemd-pstore.xml
5808e7
@@ -0,0 +1,99 @@
5808e7
+
5808e7
+
5808e7
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
5808e7
+
5808e7
+
5808e7
+
5808e7
+          xmlns:xi="http://www.w3.org/2001/XInclude">
5808e7
+
5808e7
+  <refentryinfo>
5808e7
+    <title>systemd-pstore</title>
5808e7
+    <productname>systemd</productname>
5808e7
+  </refentryinfo>
5808e7
+
5808e7
+  <refmeta>
5808e7
+    <refentrytitle>systemd-pstore</refentrytitle>
5808e7
+    <manvolnum>8</manvolnum>
5808e7
+  </refmeta>
5808e7
+
5808e7
+  <refnamediv>
5808e7
+    <refname>systemd-pstore</refname>
5808e7
+    <refname>systemd-pstore.service</refname>
5808e7
+    <refpurpose>Tool to archive contents of the persistent storage filesytem</refpurpose>
5808e7
+  </refnamediv>
5808e7
+
5808e7
+  <refsynopsisdiv>
5808e7
+    <para><filename>/usr/lib/systemd/systemd-pstore</filename></para>
5808e7
+    <para><filename>systemd-pstore.service</filename></para>
5808e7
+  </refsynopsisdiv>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>Description</title>
5808e7
+    <para><filename>systemd-pstore.service</filename> is a system service that archives the
5808e7
+    contents of the Linux persistent storage filesystem, pstore, to other storage,
5808e7
+    thus preserving the existing information contained in the pstore, and clearing
5808e7
+    pstore storage for future error events.</para>
5808e7
+
5808e7
+    <para>Linux provides a persistent storage file system, pstore, that can store
5808e7
+    error records when the kernel dies (or reboots or powers-off). These records in
5808e7
+    turn can be referenced to debug kernel problems (currently the kernel stuffs
5808e7
+    the tail of the dmesg, which also contains a stack backtrace, into pstore).</para>
5808e7
+
5808e7
+    <para>The pstore file system supports a variety of backends that map onto persistent
5808e7
+    storage, such as the ACPI ERST and UEFI variables. The pstore backends
5808e7
+    typically offer a relatively small amount of persistent storage, e.g. 64KiB,
5808e7
+    which can quickly fill up and thus prevent subsequent kernel crashes from
5808e7
+    recording errors. Thus there is a need to monitor and extract the pstore
5808e7
+    contents so that future kernel problems can also record information in the
5808e7
+    pstore.</para>
5808e7
+
5808e7
+    <para>The pstore service is independent of the kdump service. In cloud environments
5808e7
+    specifically, host and guest filesystems are on remote filesystems (eg. iSCSI
5808e7
+    or NFS), thus kdump relies [implicitly and/or explicitly] upon proper operation
5808e7
+    of networking software *and* hardware *and* infrastructure.  Thus it may not be
5808e7
+    possible to capture a kernel coredump to a file since writes over the network
5808e7
+    may not be possible.</para>
5808e7
+
5808e7
+    <para>The pstore backend, on the other hand, is completely local and provides a path
5808e7
+    to store error records which will survive a reboot and aid in post-mortem
5808e7
+    debugging.</para>
5808e7
+
5808e7
+    <para>The <command>systemd-pstore</command> executable does the actual work. Upon starting,
5808e7
+    the <filename>pstore.conf</filename> is read to obtain options, then the /sys/fs/pstore
5808e7
+    directory contents are processed according to the options. Pstore files are written to the
5808e7
+    journal, and optionally saved into /var/lib/systemd/pstore.</para>
5808e7
+  </refsect1>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>Configuration</title>
5808e7
+
5808e7
+    <para>The behavior of <command>systemd-pstore</command> is configured through the configuration file
5808e7
+    <filename>/etc/systemd/pstore.conf</filename> and corresponding snippets
5808e7
+    <filename>/etc/systemd/pstore.conf.d/*.conf</filename>, see
5808e7
+    <citerefentry><refentrytitle>pstore.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
5808e7
+    </para>
5808e7
+
5808e7
+    <refsect2>
5808e7
+      <title>Disabling pstore processing</title>
5808e7
+
5808e7
+      <para>To disable pstore processing by <command>systemd-pstore</command>,
5808e7
+      set <programlisting>Storage=none</programlisting> in
5808e7
+      <citerefentry><refentrytitle>pstore.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
5808e7
+      </para>
5808e7
+    </refsect2>
5808e7
+  </refsect1>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>Usage</title>
5808e7
+    <para>Data stored in the journal can be viewed with
5808e7
+    <citerefentry><refentrytitle>journalctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
5808e7
+    as usual.</para>
5808e7
+  </refsect1>
5808e7
+
5808e7
+  <refsect1>
5808e7
+    <title>See Also</title>
5808e7
+    <para>
5808e7
+      <citerefentry><refentrytitle>pstore.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
5808e7
+    </para>
5808e7
+  </refsect1>
5808e7
+</refentry>
5808e7
diff --git a/meson.build b/meson.build
5808e7
index af4cf331da..972a8fb6f7 100644
5808e7
--- a/meson.build
5808e7
+++ b/meson.build
5808e7
@@ -1224,6 +1224,7 @@ foreach term : ['utmp',
5808e7
                 'environment-d',
5808e7
                 'binfmt',
5808e7
                 'coredump',
5808e7
+                'pstore',
5808e7
                 'resolve',
5808e7
                 'logind',
5808e7
                 'hostnamed',
5808e7
@@ -1439,6 +1440,7 @@ subdir('src/network')
5808e7
 subdir('src/analyze')
5808e7
 subdir('src/journal-remote')
5808e7
 subdir('src/coredump')
5808e7
+subdir('src/pstore')
5808e7
 subdir('src/hostname')
5808e7
 subdir('src/import')
5808e7
 subdir('src/kernel-install')
5808e7
@@ -2151,6 +2153,23 @@ if conf.get('ENABLE_COREDUMP') == 1
5808e7
         public_programs += [exe]
5808e7
 endif
5808e7
 
5808e7
+if conf.get('ENABLE_PSTORE') == 1
5808e7
+        executable('systemd-pstore',
5808e7
+                   systemd_pstore_sources,
5808e7
+                   include_directories : includes,
5808e7
+                   link_with : [libshared],
5808e7
+                   dependencies : [threads,
5808e7
+                                   libacl,
5808e7
+                                   libdw,
5808e7
+                                   libxz,
5808e7
+                                   liblz4],
5808e7
+                   install_rpath : rootlibexecdir,
5808e7
+                   install : true,
5808e7
+                   install_dir : rootlibexecdir)
5808e7
+
5808e7
+        public_programs += exe
5808e7
+endif
5808e7
+
5808e7
 if conf.get('ENABLE_BINFMT') == 1
5808e7
         exe = executable('systemd-binfmt',
5808e7
                          'src/binfmt/binfmt.c',
5808e7
@@ -3014,6 +3033,7 @@ foreach tuple : [
5808e7
         ['resolve'],
5808e7
         ['DNS-over-TLS'],
5808e7
         ['coredump'],
5808e7
+        ['pstore'],
5808e7
         ['polkit'],
5808e7
         ['legacy pkla',      install_polkit_pkla],
5808e7
         ['efi'],
5808e7
diff --git a/meson_options.txt b/meson_options.txt
5808e7
index 213079ac15..5624304bf4 100644
5808e7
--- a/meson_options.txt
5808e7
+++ b/meson_options.txt
5808e7
@@ -76,6 +76,8 @@ option('binfmt', type : 'boolean',
5808e7
        description : 'support for custom binary formats')
5808e7
 option('coredump', type : 'boolean',
5808e7
        description : 'install the coredump handler')
5808e7
+option('pstore', type : 'boolean',
5808e7
+       description : 'install the pstore archival tool')
5808e7
 option('logind', type : 'boolean',
5808e7
        description : 'install the systemd-logind stack')
5808e7
 option('hostnamed', type : 'boolean',
5808e7
diff --git a/src/pstore/meson.build b/src/pstore/meson.build
5808e7
new file mode 100644
5808e7
index 0000000000..adbac24b54
5808e7
--- /dev/null
5808e7
+++ b/src/pstore/meson.build
5808e7
@@ -0,0 +1,10 @@
5808e7
+# SPDX-License-Identifier: LGPL-2.1+
5808e7
+
5808e7
+systemd_pstore_sources = files('''
5808e7
+        pstore.c
5808e7
+'''.split())
5808e7
+
5808e7
+if conf.get('ENABLE_PSTORE') == 1
5808e7
+        install_data('pstore.conf',
5808e7
+                     install_dir : pkgsysconfdir)
5808e7
+endif
5808e7
diff --git a/src/pstore/pstore.c b/src/pstore/pstore.c
5808e7
new file mode 100644
5808e7
index 0000000000..f95e016eb6
5808e7
--- /dev/null
5808e7
+++ b/src/pstore/pstore.c
5808e7
@@ -0,0 +1,395 @@
5808e7
+/* SPDX-License-Identifier: LGPL-2.1+ */
5808e7
+
5808e7
+/* Copyright © 2019 Oracle and/or its affiliates. */
5808e7
+
5808e7
+/* Generally speaking, the pstore contains a small number of files
5808e7
+ * that in turn contain a small amount of data.  */
5808e7
+#include <errno.h>
5808e7
+#include <stdio.h>
5808e7
+#include <stdio_ext.h>
5808e7
+#include <sys/prctl.h>
5808e7
+#include <sys/xattr.h>
5808e7
+#include <unistd.h>
5808e7
+
5808e7
+#include "sd-daemon.h"
5808e7
+#include "sd-journal.h"
5808e7
+#include "sd-login.h"
5808e7
+#include "sd-messages.h"
5808e7
+
5808e7
+#include "acl-util.h"
5808e7
+#include "alloc-util.h"
5808e7
+#include "capability-util.h"
5808e7
+#include "cgroup-util.h"
5808e7
+#include "compress.h"
5808e7
+#include "conf-parser.h"
5808e7
+#include "copy.h"
5808e7
+#include "dirent-util.h"
5808e7
+#include "escape.h"
5808e7
+#include "fd-util.h"
5808e7
+#include "fileio.h"
5808e7
+#include "fs-util.h"
5808e7
+#include "io-util.h"
5808e7
+#include "journal-importer.h"
5808e7
+#include "log.h"
5808e7
+#include "macro.h"
5808e7
+#include "missing.h"
5808e7
+#include "mkdir.h"
5808e7
+#include "parse-util.h"
5808e7
+#include "process-util.h"
5808e7
+#include "signal-util.h"
5808e7
+#include "socket-util.h"
5808e7
+#include "special.h"
5808e7
+#include "string-table.h"
5808e7
+#include "string-util.h"
5808e7
+#include "strv.h"
5808e7
+#include "user-util.h"
5808e7
+#include "util.h"
5808e7
+
5808e7
+/* Command line argument handling */
5808e7
+typedef enum PStoreStorage {
5808e7
+        PSTORE_STORAGE_NONE,
5808e7
+        PSTORE_STORAGE_EXTERNAL,
5808e7
+        PSTORE_STORAGE_JOURNAL,
5808e7
+        _PSTORE_STORAGE_MAX,
5808e7
+        _PSTORE_STORAGE_INVALID = -1
5808e7
+} PStoreStorage;
5808e7
+
5808e7
+static const char* const pstore_storage_table[_PSTORE_STORAGE_MAX] = {
5808e7
+        [PSTORE_STORAGE_NONE] = "none",
5808e7
+        [PSTORE_STORAGE_EXTERNAL] = "external",
5808e7
+        [PSTORE_STORAGE_JOURNAL] = "journal",
5808e7
+};
5808e7
+
5808e7
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(pstore_storage, PStoreStorage);
5808e7
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_pstore_storage, pstore_storage, PStoreStorage, "Failed to parse storage setting");
5808e7
+
5808e7
+static PStoreStorage arg_storage = PSTORE_STORAGE_EXTERNAL;
5808e7
+
5808e7
+static bool arg_unlink = true;
5808e7
+static const char *arg_sourcedir = "/sys/fs/pstore";
5808e7
+static const char *arg_archivedir = "/var/lib/systemd/pstore";
5808e7
+
5808e7
+static int parse_config(void) {
5808e7
+        static const ConfigTableItem items[] = {
5808e7
+                { "PStore", "Unlink",  config_parse_bool,           0, &arg_unlink },
5808e7
+                { "PStore", "Storage", config_parse_pstore_storage, 0, &arg_storage },
5808e7
+                {}
5808e7
+        };
5808e7
+
5808e7
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/pstore.conf",
5808e7
+                                        CONF_PATHS_NULSTR("systemd/pstore.conf.d"),
5808e7
+                                        "PStore\0",
5808e7
+                                        config_item_table_lookup, items,
5808e7
+                                        CONFIG_PARSE_WARN, NULL);
5808e7
+}
5808e7
+
5808e7
+/* File list handling - PStoreEntry is the struct and
5808e7
+ * and PStoreEntry is the type that contains all info
5808e7
+ * about a pstore entry.  */
5808e7
+typedef struct PStoreEntry {
5808e7
+        struct dirent dirent;
5808e7
+        bool is_binary;
5808e7
+        bool handled;
5808e7
+        char *content;
5808e7
+        size_t content_size;
5808e7
+} PStoreEntry;
5808e7
+
5808e7
+typedef struct PStoreList {
5808e7
+        PStoreEntry *entries;
5808e7
+        size_t n_entries;
5808e7
+        size_t n_entries_allocated;
5808e7
+} PStoreList;
5808e7
+
5808e7
+static void pstore_entries_reset(PStoreList *list) {
5808e7
+        for (size_t i = 0; i < list->n_entries; i++)
5808e7
+                free(list->entries[i].content);
5808e7
+        free(list->entries);
5808e7
+        list->n_entries = 0;
5808e7
+}
5808e7
+
5808e7
+static int compare_pstore_entries(const void *_a, const void *_b) {
5808e7
+        PStoreEntry *a = (PStoreEntry *)_a, *b = (PStoreEntry *)_b;
5808e7
+        return strcmp(a->dirent.d_name, b->dirent.d_name);
5808e7
+}
5808e7
+
5808e7
+static int move_file(PStoreEntry *pe, const char *subdir) {
5808e7
+        _cleanup_free_ char *ifd_path = NULL;
5808e7
+        _cleanup_free_ char *ofd_path = NULL;
5808e7
+        int r = 0;
5808e7
+        struct iovec iovec[2] = {};
5808e7
+        int n_iovec = 0;
5808e7
+        _cleanup_free_ void *field = NULL;
5808e7
+        const char *suffix = NULL;
5808e7
+        size_t field_size;
5808e7
+
5808e7
+        if (pe->handled)
5808e7
+                return 0;
5808e7
+
5808e7
+        ifd_path = path_join(NULL, arg_sourcedir, pe->dirent.d_name);
5808e7
+        if (!ifd_path)
5808e7
+                return log_oom();
5808e7
+
5808e7
+        ofd_path = path_join(arg_archivedir, subdir, pe->dirent.d_name);
5808e7
+        if (!ofd_path)
5808e7
+                return log_oom();
5808e7
+
5808e7
+        /* Always log to the journal */
5808e7
+        suffix = arg_storage == PSTORE_STORAGE_EXTERNAL ? strjoina(" moved to ", ofd_path) : (char *)".";
5808e7
+        field = strjoina("MESSAGE=PStore ", pe->dirent.d_name, suffix);
5808e7
+        iovec[n_iovec++] = IOVEC_MAKE_STRING(field);
5808e7
+
5808e7
+        field_size = strlen("FILE=") + pe->content_size;
5808e7
+        field = malloc(field_size);
5808e7
+        if (!field)
5808e7
+                return log_oom();
5808e7
+        memcpy(stpcpy(field, "FILE="), pe->content, pe->content_size);
5808e7
+        iovec[n_iovec++] = IOVEC_MAKE(field, field_size);
5808e7
+
5808e7
+        r = sd_journal_sendv(iovec, n_iovec);
5808e7
+        if (r < 0)
5808e7
+                return log_error_errno(r, "Failed to log pstore entry: %m");
5808e7
+
5808e7
+        if (arg_storage == PSTORE_STORAGE_EXTERNAL) {
5808e7
+                /* Move file from pstore to external storage */
5808e7
+                r = mkdir_parents(ofd_path, 0755);
5808e7
+                if (r < 0)
5808e7
+                        return log_error_errno(r, "Failed to create directoy %s: %m", ofd_path);
5808e7
+                r = copy_file_atomic(ifd_path, ofd_path, 0600, 0, COPY_REPLACE);
5808e7
+                if (r < 0)
5808e7
+                        return log_error_errno(r, "Failed to copy_file_atomic: %s to %s", ifd_path, ofd_path);
5808e7
+        }
5808e7
+
5808e7
+        /* If file copied properly, remove it from pstore */
5808e7
+        if (arg_unlink)
5808e7
+                (void) unlink(ifd_path);
5808e7
+
5808e7
+        pe->handled = true;
5808e7
+
5808e7
+        return 0;
5808e7
+}
5808e7
+
5808e7
+static int write_dmesg(const char *dmesg, size_t size, const char *id) {
5808e7
+        _cleanup_(unlink_and_freep) char *ofd_path = NULL;
5808e7
+        _cleanup_free_ char *tmp_path = NULL;
5808e7
+        _cleanup_close_ int ofd = -1;
5808e7
+        ssize_t wr;
5808e7
+        int r;
5808e7
+
5808e7
+        if (isempty(dmesg) || size == 0)
5808e7
+                return 0;
5808e7
+
5808e7
+        /* log_info("Record ID %s", id); */
5808e7
+
5808e7
+        ofd_path = path_join(arg_archivedir, id, "dmesg.txt");
5808e7
+        if (!ofd_path)
5808e7
+                return log_oom();
5808e7
+
5808e7
+        ofd = open_tmpfile_linkable(ofd_path, O_CLOEXEC|O_CREAT|O_TRUNC|O_WRONLY, &tmp_path);
5808e7
+        if (ofd < 0)
5808e7
+                return log_error_errno(ofd, "Failed to open temporary file %s: %m", ofd_path);
5808e7
+        wr = write(ofd, dmesg, size);
5808e7
+        if (wr < 0)
5808e7
+                return log_error_errno(errno, "Failed to store dmesg to %s: %m", ofd_path);
5808e7
+        if (wr != (ssize_t)size)
5808e7
+                return log_error_errno(-EIO, "Failed to store dmesg to %s. %zu bytes are lost.", ofd_path, size - wr);
5808e7
+        r = link_tmpfile(ofd, tmp_path, ofd_path);
5808e7
+        if (r < 0)
5808e7
+                return log_error_errno(r, "Failed to write temporary file %s: %m", ofd_path);
5808e7
+        ofd_path = mfree(ofd_path);
5808e7
+
5808e7
+        return 0;
5808e7
+}
5808e7
+
5808e7
+static void process_dmesg_files(PStoreList *list) {
5808e7
+        /* Move files, reconstruct dmesg.txt */
5808e7
+        PStoreEntry *pe;
5808e7
+        _cleanup_free_ char *dmesg = NULL;
5808e7
+        size_t dmesg_size = 0;
5808e7
+        _cleanup_free_ char *dmesg_id = NULL;
5808e7
+
5808e7
+        /* Handle each dmesg file: files processed in reverse
5808e7
+         * order so as to properly reconstruct original dmesg */
5808e7
+        for (size_t n = list->n_entries; n > 0; n--) {
5808e7
+                bool move_file_and_continue = false;
5808e7
+                _cleanup_free_ char *pe_id = NULL;
5808e7
+                char *p;
5808e7
+                size_t plen;
5808e7
+
5808e7
+                pe = &list->entries[n-1];
5808e7
+
5808e7
+                if (pe->handled)
5808e7
+                        continue;
5808e7
+                if (!startswith(pe->dirent.d_name, "dmesg-"))
5808e7
+                        continue;
5808e7
+
5808e7
+                if (endswith(pe->dirent.d_name, ".enc.z")) /* indicates a problem */
5808e7
+                        move_file_and_continue = true;
5808e7
+                p = strrchr(pe->dirent.d_name, '-');
5808e7
+                if (!p)
5808e7
+                        move_file_and_continue = true;
5808e7
+
5808e7
+                if (move_file_and_continue) {
5808e7
+                        /* A dmesg file on which we do NO additional processing */
5808e7
+                        (void) move_file(pe, NULL);
5808e7
+                        continue;
5808e7
+                }
5808e7
+
5808e7
+                /* See if this file is one of a related group of files
5808e7
+                 * in order to reconstruct dmesg */
5808e7
+
5808e7
+                /* When dmesg is written into pstore, it is done so in
5808e7
+                 * small chunks, whatever the exchange buffer size is
5808e7
+                 * with the underlying pstore backend (ie. EFI may be
5808e7
+                 * ~2KiB), which means an example pstore with approximately
5808e7
+                 * 64KB of storage may have up to roughly 32 dmesg files
5808e7
+                 * that could be related, depending upon the size of the
5808e7
+                 * original dmesg.
5808e7
+                 *
5808e7
+                 * Here we look at the dmesg filename and try to discern
5808e7
+                 * if files are part of a related group, meaning the same
5808e7
+                 * original dmesg.
5808e7
+                 *
5808e7
+                 * The two known pstore backends are EFI and ERST. These
5808e7
+                 * backends store data in the Common Platform Error
5808e7
+                 * Record, CPER, format. The dmesg- filename contains the
5808e7
+                 * CPER record id, a 64bit number (in decimal notation).
5808e7
+                 * In Linux, the record id is encoded with two digits for
5808e7
+                 * the dmesg part (chunk) number and 3 digits for the
5808e7
+                 * count number. So allowing an additional digit to
5808e7
+                 * compensate for advancing time, this code ignores the
5808e7
+                 * last six digits of the filename in determining the
5808e7
+                 * record id.
5808e7
+                 *
5808e7
+                 * For the EFI backend, the record id encodes an id in the
5808e7
+                 * upper 32 bits, and a timestamp in the lower 32-bits.
5808e7
+                 * So ignoring the least significant 6 digits has proven
5808e7
+                 * to generally identify related dmesg entries.  */
5808e7
+#define PSTORE_FILENAME_IGNORE 6
5808e7
+
5808e7
+                /* determine common portion of record id */
5808e7
+                ++p; /* move beyond dmesg- */
5808e7
+                plen = strlen(p);
5808e7
+                if (plen > PSTORE_FILENAME_IGNORE) {
5808e7
+                        pe_id = memdup_suffix0(p, plen - PSTORE_FILENAME_IGNORE);
5808e7
+                        if (!pe_id) {
5808e7
+                                log_oom();
5808e7
+                                return;
5808e7
+                        }
5808e7
+                } else
5808e7
+                        pe_id = mfree(pe_id);
5808e7
+
5808e7
+                /* Now move file from pstore to archive storage */
5808e7
+                move_file(pe, pe_id);
5808e7
+
5808e7
+                /* If the current record id is NOT the same as the
5808e7
+                 * previous record id, then start a new dmesg.txt file */
5808e7
+                if (!pe_id || !dmesg_id || !streq(pe_id, dmesg_id)) {
5808e7
+                        /* Encountered a new dmesg group, close out old one, open new one */
5808e7
+                        if (dmesg) {
5808e7
+                                (void) write_dmesg(dmesg, dmesg_size, dmesg_id);
5808e7
+                                dmesg = mfree(dmesg);
5808e7
+                                dmesg_size = 0;
5808e7
+                        }
5808e7
+
5808e7
+                        /* now point dmesg_id to storage of pe_id */
5808e7
+                        free_and_replace(dmesg_id, pe_id);
5808e7
+                }
5808e7
+
5808e7
+                /* Reconstruction of dmesg is done as a useful courtesy, do not log errors */
5808e7
+                dmesg = realloc(dmesg, dmesg_size + strlen(pe->dirent.d_name) + strlen(":\n") + pe->content_size + 1);
5808e7
+                if (dmesg) {
5808e7
+                        dmesg_size += sprintf(&dmesg[dmesg_size], "%s:\n", pe->dirent.d_name);
5808e7
+                        if (pe->content) {
5808e7
+                                memcpy(&dmesg[dmesg_size], pe->content, pe->content_size);
5808e7
+                                dmesg_size += pe->content_size;
5808e7
+                        }
5808e7
+                }
5808e7
+
5808e7
+                pe_id = mfree(pe_id);
5808e7
+        }
5808e7
+        if (dmesg)
5808e7
+                (void) write_dmesg(dmesg, dmesg_size, dmesg_id);
5808e7
+}
5808e7
+
5808e7
+static int list_files(PStoreList *list, const char *sourcepath) {
5808e7
+        _cleanup_(closedirp) DIR *dirp = NULL;
5808e7
+        struct dirent *de;
5808e7
+        int r = 0;
5808e7
+
5808e7
+        dirp = opendir(sourcepath);
5808e7
+        if (!dirp)
5808e7
+                return log_error_errno(errno, "Failed to opendir %s: %m", sourcepath);
5808e7
+
5808e7
+        FOREACH_DIRENT(de, dirp, return log_error_errno(errno, "Failed to iterate through %s: %m", sourcepath)) {
5808e7
+                _cleanup_free_ char *ifd_path = NULL;
5808e7
+
5808e7
+                ifd_path = path_join(NULL, sourcepath, de->d_name);
5808e7
+                if (!ifd_path)
5808e7
+                        return log_oom();
5808e7
+
5808e7
+                _cleanup_free_ char *buf = NULL;
5808e7
+                size_t buf_size;
5808e7
+
5808e7
+                /* Now read contents of pstore file */
5808e7
+                r = read_full_file(ifd_path, &buf, &buf_size);
5808e7
+                if (r < 0) {
5808e7
+                        log_warning_errno(r, "Failed to read file %s: %m", ifd_path);
5808e7
+                        continue;
5808e7
+                }
5808e7
+
5808e7
+                if (!GREEDY_REALLOC(list->entries, list->n_entries_allocated, list->n_entries + 1))
5808e7
+                        return log_oom();
5808e7
+
5808e7
+                list->entries[list->n_entries++] = (PStoreEntry) {
5808e7
+                        .dirent = *de,
5808e7
+                        .content = TAKE_PTR(buf),
5808e7
+                        .content_size = buf_size,
5808e7
+                        .is_binary = true,
5808e7
+                        .handled = false,
5808e7
+                };
5808e7
+        }
5808e7
+
5808e7
+        return r;
5808e7
+}
5808e7
+
5808e7
+static int run(int argc, char *argv[]) {
5808e7
+        _cleanup_(pstore_entries_reset) PStoreList list = {};
5808e7
+        int r;
5808e7
+
5808e7
+        log_open();
5808e7
+
5808e7
+        /* Ignore all parse errors */
5808e7
+        (void) parse_config();
5808e7
+
5808e7
+        log_debug("Selected storage '%s'.", pstore_storage_to_string(arg_storage));
5808e7
+        log_debug("Selected Unlink '%d'.", arg_unlink);
5808e7
+
5808e7
+        if (arg_storage == PSTORE_STORAGE_NONE)
5808e7
+                /* Do nothing, intentionally, leaving pstore untouched */
5808e7
+                return 0;
5808e7
+
5808e7
+        /* Obtain list of files in pstore */
5808e7
+        r = list_files(&list, arg_sourcedir);
5808e7
+        if (r < 0)
5808e7
+                return r;
5808e7
+
5808e7
+        /* Handle each pstore file */
5808e7
+        /* Sort files lexigraphically ascending, generally needed by all */
5808e7
+        qsort_safe(list.entries, list.n_entries, sizeof(PStoreEntry), compare_pstore_entries);
5808e7
+
5808e7
+        /* Process known file types */
5808e7
+        process_dmesg_files(&list);
5808e7
+
5808e7
+        /* Move left over files out of pstore */
5808e7
+        for (size_t n = 0; n < list.n_entries; n++)
5808e7
+                move_file(&list.entries[n], NULL);
5808e7
+
5808e7
+        return 0;
5808e7
+}
5808e7
+
5808e7
+int main(int argc, char *argv[]) {
5808e7
+        int r;
5808e7
+
5808e7
+        r = run(argc, argv);
5808e7
+        return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
5808e7
+}
5808e7
diff --git a/src/pstore/pstore.conf b/src/pstore/pstore.conf
5808e7
new file mode 100644
5808e7
index 0000000000..93a8b6707c
5808e7
--- /dev/null
5808e7
+++ b/src/pstore/pstore.conf
5808e7
@@ -0,0 +1,16 @@
5808e7
+#  This file is part of systemd.
5808e7
+#
5808e7
+#  systemd is free software; you can redistribute it and/or modify it
5808e7
+#  under the terms of the GNU Lesser General Public License as published by
5808e7
+#  the Free Software Foundation; either version 2.1 of the License, or
5808e7
+#  (at your option) any later version.
5808e7
+#
5808e7
+# Entries in this file show the compile time defaults.
5808e7
+# You can change settings by editing this file.
5808e7
+# Defaults can be restored by simply deleting this file.
5808e7
+#
5808e7
+# See pstore.conf(5) for details.
5808e7
+
5808e7
+[PStore]
5808e7
+#Storage=external
5808e7
+#Unlink=yes
5808e7
diff --git a/units/meson.build b/units/meson.build
5808e7
index a74fa95195..e8e64eb30a 100644
5808e7
--- a/units/meson.build
5808e7
+++ b/units/meson.build
5808e7
@@ -136,6 +136,7 @@ in_units = [
5808e7
         ['systemd-binfmt.service',               'ENABLE_BINFMT',
5808e7
          'sysinit.target.wants/'],
5808e7
         ['systemd-coredump@.service',            'ENABLE_COREDUMP'],
5808e7
+        ['systemd-pstore.service',               'ENABLE_PSTORE'],
5808e7
         ['systemd-firstboot.service',            'ENABLE_FIRSTBOOT',
5808e7
          'sysinit.target.wants/'],
5808e7
         ['systemd-fsck-root.service',            ''],
5808e7
diff --git a/units/systemd-pstore.service.in b/units/systemd-pstore.service.in
5808e7
new file mode 100644
5808e7
index 0000000000..fec2b1aebf
5808e7
--- /dev/null
5808e7
+++ b/units/systemd-pstore.service.in
5808e7
@@ -0,0 +1,24 @@
5808e7
+#  SPDX-License-Identifier: LGPL-2.1+
5808e7
+#
5808e7
+#  This file is part of systemd.
5808e7
+#
5808e7
+#  systemd is free software; you can redistribute it and/or modify it
5808e7
+#  under the terms of the GNU Lesser General Public License as published by
5808e7
+#  the Free Software Foundation; either version 2.1 of the License, or
5808e7
+#  (at your option) any later version.
5808e7
+
5808e7
+[Unit]
5808e7
+Description=Platform Persistent Storage Archival
5808e7
+Documentation=man:systemd-pstore(8)
5808e7
+DefaultDependencies=no
5808e7
+Wants=systemd-remount-fs.service
5808e7
+After=systemd-remount-fs.service
5808e7
+
5808e7
+[Service]
5808e7
+Type=oneshot
5808e7
+ExecStart=@rootlibexecdir@/systemd-pstore
5808e7
+RemainAfterExit=yes
5808e7
+StateDirectory=systemd/pstore
5808e7
+
5808e7
+[Install]
5808e7
+WantedBy=systemd-remount-fs.service