bf143f
From 86aeb4fd7ff9395afba574e422d83f990ce1f047 Mon Sep 17 00:00:00 2001
bf143f
From: Janosch Frank <frankja@linux.ibm.com>
bf143f
Date: Mon, 17 Oct 2022 08:38:22 +0000
bf143f
Subject: [PATCH 41/42] s390x: pv: Add dump support
bf143f
MIME-Version: 1.0
bf143f
Content-Type: text/plain; charset=UTF-8
bf143f
Content-Transfer-Encoding: 8bit
bf143f
bf143f
RH-Author: Cédric Le Goater <clg@redhat.com>
bf143f
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
bf143f
RH-Bugzilla: 1664378 2043909
bf143f
RH-Acked-by: Thomas Huth <thuth@redhat.com>
bf143f
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
bf143f
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
bf143f
RH-Commit: [41/41] 2731c2329276e76013e3b3df21e9743bc74edd2b
bf143f
bf143f
Sometimes dumping a guest from the outside is the only way to get the
bf143f
data that is needed. This can be the case if a dumping mechanism like
bf143f
KDUMP hasn't been configured or data needs to be fetched at a specific
bf143f
point. Dumping a protected guest from the outside without help from
bf143f
fw/hw doesn't yield sufficient data to be useful. Hence we now
bf143f
introduce PV dump support.
bf143f
bf143f
The PV dump support works by integrating the firmware into the dump
bf143f
process. New Ultravisor calls are used to initiate the dump process,
bf143f
dump cpu data, dump memory state and lastly complete the dump process.
bf143f
The UV calls are exposed by KVM via the new KVM_PV_DUMP command and
bf143f
its subcommands. The guest's data is fully encrypted and can only be
bf143f
decrypted by the entity that owns the customer communication key for
bf143f
the dumped guest. Also dumping needs to be allowed via a flag in the
bf143f
SE header.
bf143f
bf143f
On the QEMU side of things we store the PV dump data in the newly
bf143f
introduced architecture ELF sections (storage state and completion
bf143f
data) and the cpu notes (for cpu dump data).
bf143f
bf143f
Users can use the zgetdump tool to convert the encrypted QEMU dump to an
bf143f
unencrypted one.
bf143f
bf143f
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
bf143f
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
bf143f
Message-Id: <20221017083822.43118-11-frankja@linux.ibm.com>
bf143f
(cherry picked from commit 113d8f4e95cf0450bea421263de6ec016c779ad0)
bf143f
Signed-off-by: Cédric Le Goater <clg@redhat.com>
bf143f
---
bf143f
 dump/dump.c              |  12 +-
bf143f
 include/sysemu/dump.h    |   5 +
bf143f
 target/s390x/arch_dump.c | 262 +++++++++++++++++++++++++++++++++++----
bf143f
 3 files changed, 246 insertions(+), 33 deletions(-)
bf143f
bf143f
diff --git a/dump/dump.c b/dump/dump.c
bf143f
index 4aa8fb64d2..5dee060b73 100644
bf143f
--- a/dump/dump.c
bf143f
+++ b/dump/dump.c
bf143f
@@ -709,9 +709,9 @@ static void dump_begin(DumpState *s, Error **errp)
bf143f
     write_elf_notes(s, errp);
bf143f
 }
bf143f
 
bf143f
-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
bf143f
-                                           int64_t filter_area_start,
bf143f
-                                           int64_t filter_area_length)
bf143f
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
bf143f
+                                    int64_t filter_area_start,
bf143f
+                                    int64_t filter_area_length)
bf143f
 {
bf143f
     int64_t size, left, right;
bf143f
 
bf143f
@@ -729,9 +729,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
bf143f
     return size;
bf143f
 }
bf143f
 
bf143f
-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
bf143f
-                                            int64_t filter_area_start,
bf143f
-                                            int64_t filter_area_length)
bf143f
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
bf143f
+                                     int64_t filter_area_start,
bf143f
+                                     int64_t filter_area_length)
bf143f
 {
bf143f
     if (filter_area_length) {
bf143f
         /* return -1 if the block is not within filter area */
bf143f
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
bf143f
index 38ccac7190..4ffed0b659 100644
bf143f
--- a/include/sysemu/dump.h
bf143f
+++ b/include/sysemu/dump.h
bf143f
@@ -215,4 +215,9 @@ typedef struct DumpState {
bf143f
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
bf143f
 uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
bf143f
 uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
bf143f
+
bf143f
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
bf143f
+                                    int64_t filter_area_length);
bf143f
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
bf143f
+                                     int64_t filter_area_length);
bf143f
 #endif
bf143f
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
bf143f
index f60a14920d..a2329141e8 100644
bf143f
--- a/target/s390x/arch_dump.c
bf143f
+++ b/target/s390x/arch_dump.c
bf143f
@@ -12,11 +12,13 @@
bf143f
  */
bf143f
 
bf143f
 #include "qemu/osdep.h"
bf143f
+#include "qemu/units.h"
bf143f
 #include "cpu.h"
bf143f
 #include "s390x-internal.h"
bf143f
 #include "elf.h"
bf143f
 #include "sysemu/dump.h"
bf143f
-
bf143f
+#include "hw/s390x/pv.h"
bf143f
+#include "kvm/kvm_s390x.h"
bf143f
 
bf143f
 struct S390xUserRegsStruct {
bf143f
     uint64_t psw[2];
bf143f
@@ -76,9 +78,16 @@ typedef struct noteStruct {
bf143f
         uint64_t todcmp;
bf143f
         uint32_t todpreg;
bf143f
         uint64_t ctrs[16];
bf143f
+        uint8_t dynamic[1];  /*
bf143f
+                              * Would be a flexible array member, if
bf143f
+                              * that was legal inside a union. Real
bf143f
+                              * size comes from PV info interface.
bf143f
+                              */
bf143f
     } contents;
bf143f
 } QEMU_PACKED Note;
bf143f
 
bf143f
+static bool pv_dump_initialized;
bf143f
+
bf143f
 static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
bf143f
 {
bf143f
     int i;
bf143f
@@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
bf143f
     note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
bf143f
 }
bf143f
 
bf143f
+static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id)
bf143f
+{
bf143f
+    note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA);
bf143f
+    if (!pv_dump_initialized) {
bf143f
+        return;
bf143f
+    }
bf143f
+    kvm_s390_dump_cpu(cpu, &note->contents.dynamic);
bf143f
+}
bf143f
 
bf143f
 typedef struct NoteFuncDescStruct {
bf143f
     int contents_size;
bf143f
+    uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */
bf143f
     void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
bf143f
+    bool pvonly;
bf143f
 } NoteFuncDesc;
bf143f
 
bf143f
 static const NoteFuncDesc note_core[] = {
bf143f
-    {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
bf143f
-    {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
bf143f
-    { 0, NULL}
bf143f
+    {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false},
bf143f
+    {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false},
bf143f
+    { 0, NULL, NULL, false}
bf143f
 };
bf143f
 
bf143f
 static const NoteFuncDesc note_linux[] = {
bf143f
-    {sizeof_field(Note, contents.prefix),   s390x_write_elf64_prefix},
bf143f
-    {sizeof_field(Note, contents.ctrs),     s390x_write_elf64_ctrs},
bf143f
-    {sizeof_field(Note, contents.timer),    s390x_write_elf64_timer},
bf143f
-    {sizeof_field(Note, contents.todcmp),   s390x_write_elf64_todcmp},
bf143f
-    {sizeof_field(Note, contents.todpreg),  s390x_write_elf64_todpreg},
bf143f
-    {sizeof_field(Note, contents.vregslo),  s390x_write_elf64_vregslo},
bf143f
-    {sizeof_field(Note, contents.vregshi),  s390x_write_elf64_vregshi},
bf143f
-    {sizeof_field(Note, contents.gscb),     s390x_write_elf64_gscb},
bf143f
-    { 0, NULL}
bf143f
+    {sizeof_field(Note, contents.prefix),   NULL, s390x_write_elf64_prefix,  false},
bf143f
+    {sizeof_field(Note, contents.ctrs),     NULL, s390x_write_elf64_ctrs,    false},
bf143f
+    {sizeof_field(Note, contents.timer),    NULL, s390x_write_elf64_timer,   false},
bf143f
+    {sizeof_field(Note, contents.todcmp),   NULL, s390x_write_elf64_todcmp,  false},
bf143f
+    {sizeof_field(Note, contents.todpreg),  NULL, s390x_write_elf64_todpreg, false},
bf143f
+    {sizeof_field(Note, contents.vregslo),  NULL, s390x_write_elf64_vregslo, false},
bf143f
+    {sizeof_field(Note, contents.vregshi),  NULL, s390x_write_elf64_vregshi, false},
bf143f
+    {sizeof_field(Note, contents.gscb),     NULL, s390x_write_elf64_gscb,    false},
bf143f
+    {0, kvm_s390_pv_dmp_get_size_cpu,       s390x_write_elf64_pv, true},
bf143f
+    { 0, NULL, NULL, false}
bf143f
 };
bf143f
 
bf143f
 static int s390x_write_elf64_notes(const char *note_name,
bf143f
@@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name,
bf143f
                                        DumpState *s,
bf143f
                                        const NoteFuncDesc *funcs)
bf143f
 {
bf143f
-    Note note;
bf143f
+    Note note, *notep;
bf143f
     const NoteFuncDesc *nf;
bf143f
-    int note_size;
bf143f
+    int note_size, content_size;
bf143f
     int ret = -1;
bf143f
 
bf143f
     assert(strlen(note_name) < sizeof(note.name));
bf143f
 
bf143f
     for (nf = funcs; nf->note_contents_func; nf++) {
bf143f
-        memset(&note, 0, sizeof(note));
bf143f
-        note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
bf143f
-        note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
bf143f
-        g_strlcpy(note.name, note_name, sizeof(note.name));
bf143f
-        (*nf->note_contents_func)(&note, cpu, id);
bf143f
+        notep = ¬e;
bf143f
+        if (nf->pvonly && !s390_is_pv()) {
bf143f
+            continue;
bf143f
+        }
bf143f
+
bf143f
+        content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size;
bf143f
+        note_size = sizeof(note) - sizeof(notep->contents) + content_size;
bf143f
+
bf143f
+        /* Notes with dynamic sizes need to allocate a note */
bf143f
+        if (nf->note_size_func) {
bf143f
+            notep = g_malloc(note_size);
bf143f
+        }
bf143f
+
bf143f
+        memset(notep, 0, sizeof(note));
bf143f
 
bf143f
-        note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
bf143f
-        ret = f(&note, note_size, s);
bf143f
+        /* Setup note header data */
bf143f
+        notep->hdr.n_descsz = cpu_to_be32(content_size);
bf143f
+        notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
bf143f
+        g_strlcpy(notep->name, note_name, sizeof(notep->name));
bf143f
+
bf143f
+        /* Get contents and write them out */
bf143f
+        (*nf->note_contents_func)(notep, cpu, id);
bf143f
+        ret = f(notep, note_size, s);
bf143f
+
bf143f
+        if (nf->note_size_func) {
bf143f
+            g_free(notep);
bf143f
+        }
bf143f
 
bf143f
         if (ret < 0) {
bf143f
             return -1;
bf143f
@@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
bf143f
     return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
bf143f
 }
bf143f
 
bf143f
+/* PV dump section size functions */
bf143f
+static uint64_t get_mem_state_size_from_len(uint64_t len)
bf143f
+{
bf143f
+    return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state();
bf143f
+}
bf143f
+
bf143f
+static uint64_t get_size_mem_state(DumpState *s)
bf143f
+{
bf143f
+    return get_mem_state_size_from_len(s->total_size);
bf143f
+}
bf143f
+
bf143f
+static uint64_t get_size_completion_data(DumpState *s)
bf143f
+{
bf143f
+    return kvm_s390_pv_dmp_get_size_completion_data();
bf143f
+}
bf143f
+
bf143f
+/* PV dump section data functions*/
bf143f
+static int get_data_completion(DumpState *s, uint8_t *buff)
bf143f
+{
bf143f
+    int rc;
bf143f
+
bf143f
+    if (!pv_dump_initialized) {
bf143f
+        return 0;
bf143f
+    }
bf143f
+    rc = kvm_s390_dump_completion_data(buff);
bf143f
+    if (!rc) {
bf143f
+            pv_dump_initialized = false;
bf143f
+    }
bf143f
+    return rc;
bf143f
+}
bf143f
+
bf143f
+static int get_mem_state(DumpState *s, uint8_t *buff)
bf143f
+{
bf143f
+    int64_t memblock_size, memblock_start;
bf143f
+    GuestPhysBlock *block;
bf143f
+    uint64_t off;
bf143f
+    int rc;
bf143f
+
bf143f
+    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
bf143f
+        memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin,
bf143f
+                                                      s->filter_area_length);
bf143f
+        if (memblock_start == -1) {
bf143f
+            continue;
bf143f
+        }
bf143f
+
bf143f
+        memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin,
bf143f
+                                                    s->filter_area_length);
bf143f
+
bf143f
+        off = get_mem_state_size_from_len(block->target_start);
bf143f
+
bf143f
+        rc = kvm_s390_dump_mem_state(block->target_start,
bf143f
+                                     get_mem_state_size_from_len(memblock_size),
bf143f
+                                     buff + off);
bf143f
+        if (rc) {
bf143f
+            return rc;
bf143f
+        }
bf143f
+    }
bf143f
+
bf143f
+    return 0;
bf143f
+}
bf143f
+
bf143f
+static struct sections {
bf143f
+    uint64_t (*sections_size_func)(DumpState *s);
bf143f
+    int (*sections_contents_func)(DumpState *s, uint8_t *buff);
bf143f
+    char sctn_str[12];
bf143f
+} sections[] = {
bf143f
+    { get_size_mem_state, get_mem_state, "pv_mem_meta"},
bf143f
+    { get_size_completion_data, get_data_completion, "pv_compl"},
bf143f
+    {NULL , NULL, ""}
bf143f
+};
bf143f
+
bf143f
+static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff)
bf143f
+{
bf143f
+    Elf64_Shdr *shdr = (void *)buff;
bf143f
+    struct sections *sctn = sections;
bf143f
+    uint64_t off = s->section_offset;
bf143f
+
bf143f
+    if (!pv_dump_initialized) {
bf143f
+        return 0;
bf143f
+    }
bf143f
+
bf143f
+    for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
bf143f
+        memset(shdr, 0, sizeof(*shdr));
bf143f
+        shdr->sh_type = SHT_PROGBITS;
bf143f
+        shdr->sh_offset = off;
bf143f
+        shdr->sh_size = sctn->sections_size_func(s);
bf143f
+        shdr->sh_name = s->string_table_buf->len;
bf143f
+        g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str));
bf143f
+    }
bf143f
+
bf143f
+    return (uintptr_t)shdr - (uintptr_t)buff;
bf143f
+}
bf143f
+
bf143f
+
bf143f
+/* Add arch specific number of sections and their respective sizes */
bf143f
+static void arch_sections_add(DumpState *s)
bf143f
+{
bf143f
+    struct sections *sctn = sections;
bf143f
+
bf143f
+    /*
bf143f
+     * We only do a PV dump if we are running a PV guest, KVM supports
bf143f
+     * the dump API and we got valid dump length information.
bf143f
+     */
bf143f
+    if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
bf143f
+        !kvm_s390_pv_info_basic_valid()) {
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    /*
bf143f
+     * Start the UV dump process by doing the initialize dump call via
bf143f
+     * KVM as the proxy.
bf143f
+     */
bf143f
+    if (!kvm_s390_dump_init()) {
bf143f
+        pv_dump_initialized = true;
bf143f
+    } else {
bf143f
+        /*
bf143f
+         * Dump init failed, maybe the guest owner disabled dumping.
bf143f
+         * We'll continue the non-PV dump process since this is no
bf143f
+         * reason to crash qemu.
bf143f
+         */
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    for (; sctn->sections_size_func; sctn++) {
bf143f
+        s->shdr_num += 1;
bf143f
+        s->elf_section_data_size += sctn->sections_size_func(s);
bf143f
+    }
bf143f
+}
bf143f
+
bf143f
+/*
bf143f
+ * After the PV dump has been initialized, the CPU data has been
bf143f
+ * fetched and memory has been dumped, we need to grab the tweak data
bf143f
+ * and the completion data.
bf143f
+ */
bf143f
+static int arch_sections_write(DumpState *s, uint8_t *buff)
bf143f
+{
bf143f
+    struct sections *sctn = sections;
bf143f
+    int rc;
bf143f
+
bf143f
+    if (!pv_dump_initialized) {
bf143f
+        return -EINVAL;
bf143f
+    }
bf143f
+
bf143f
+    for (; sctn->sections_size_func; sctn++) {
bf143f
+        rc = sctn->sections_contents_func(s, buff);
bf143f
+        buff += sctn->sections_size_func(s);
bf143f
+        if (rc) {
bf143f
+            return rc;
bf143f
+        }
bf143f
+    }
bf143f
+    return 0;
bf143f
+}
bf143f
+
bf143f
 int cpu_get_dump_info(ArchDumpInfo *info,
bf143f
                       const struct GuestPhysBlockList *guest_phys_blocks)
bf143f
 {
bf143f
     info->d_machine = EM_S390;
bf143f
     info->d_endian = ELFDATA2MSB;
bf143f
     info->d_class = ELFCLASS64;
bf143f
-
bf143f
+    /*
bf143f
+     * This is evaluated for each dump so we can freely switch
bf143f
+     * between PV and non-PV.
bf143f
+     */
bf143f
+    if (s390_is_pv() && kvm_s390_get_protected_dump() &&
bf143f
+        kvm_s390_pv_info_basic_valid()) {
bf143f
+        info->arch_sections_add_fn = *arch_sections_add;
bf143f
+        info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
bf143f
+        info->arch_sections_write_fn = *arch_sections_write;
bf143f
+    } else {
bf143f
+        info->arch_sections_add_fn = NULL;
bf143f
+        info->arch_sections_write_hdr_fn = NULL;
bf143f
+        info->arch_sections_write_fn = NULL;
bf143f
+    }
bf143f
     return 0;
bf143f
 }
bf143f
 
bf143f
@@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
bf143f
 {
bf143f
     int name_size = 8; /* "LINUX" or "CORE" + pad */
bf143f
     size_t elf_note_size = 0;
bf143f
-    int note_head_size;
bf143f
+    int note_head_size, content_size;
bf143f
     const NoteFuncDesc *nf;
bf143f
 
bf143f
     assert(class == ELFCLASS64);
bf143f
@@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
bf143f
     note_head_size = sizeof(Elf64_Nhdr);
bf143f
 
bf143f
     for (nf = note_core; nf->note_contents_func; nf++) {
bf143f
-        elf_note_size = elf_note_size + note_head_size + name_size +
bf143f
-                        nf->contents_size;
bf143f
+        elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size;
bf143f
     }
bf143f
     for (nf = note_linux; nf->note_contents_func; nf++) {
bf143f
+        if (nf->pvonly && !s390_is_pv()) {
bf143f
+            continue;
bf143f
+        }
bf143f
+        content_size = nf->contents_size ? nf->contents_size : nf->note_size_func();
bf143f
         elf_note_size = elf_note_size + note_head_size + name_size +
bf143f
-                        nf->contents_size;
bf143f
+                        content_size;
bf143f
     }
bf143f
 
bf143f
     return (elf_note_size) * nr_cpus;
bf143f
-- 
bf143f
2.37.3
bf143f