05bba0
From 6a5797ecb4e557c746356a2f8636cceafa787851 Mon Sep 17 00:00:00 2001
05bba0
From: John Snow <jsnow@redhat.com>
05bba0
Date: Fri, 26 Jun 2015 21:52:47 +0200
05bba0
Subject: [PATCH 2/2] ide: Correct handling of malformed/short PRDTs
05bba0
05bba0
Message-id: <1435355567-29641-3-git-send-email-jsnow@redhat.com>
05bba0
Patchwork-id: 66536
05bba0
O-Subject: [RHEL-7.2 qemu-kvm PATCH 2/2] ide: Correct handling of malformed/short PRDTs
05bba0
Bugzilla: 1205100
05bba0
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
05bba0
RH-Acked-by: Juan Quintela <quintela@redhat.com>
05bba0
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
05bba0
05bba0
This impacts both BMDMA and AHCI HBA interfaces for IDE.
05bba0
Currently, we confuse the difference between a PRDT having
05bba0
"0 bytes" and a PRDT having "0 complete sectors."
05bba0
05bba0
When we receive an incomplete sector, inconsistent error checking
05bba0
leads to an infinite loop wherein the call succeeds, but it
05bba0
didn't give us enough bytes -- leading us to re-call the
05bba0
DMA chain over and over again. This leads to, in the BMDMA case,
05bba0
leaked memory for short PRDTs, and infinite loops and resource
05bba0
usage in the AHCI case.
05bba0
05bba0
The .prepare_buf() callback is reworked to return the number of
05bba0
bytes that it successfully prepared. 0 is a valid, non-error
05bba0
answer that means the table was empty and described no bytes.
05bba0
-1 indicates an error.
05bba0
05bba0
Our current implementation uses the io_buffer in IDEState to
05bba0
ultimately describe the size of a prepared scatter-gather list.
05bba0
Even though the AHCI PRDT/SGList can be as large as 256GiB, the
05bba0
AHCI command header limits transactions to just 4GiB. ATA8-ACS3,
05bba0
however, defines the largest transaction to be an LBA48 command
05bba0
that transfers 65,536 sectors. With a 512 byte sector size, this
05bba0
is just 32MiB.
05bba0
05bba0
Since our current state structures use the int type to describe
05bba0
the size of the buffer, and this state is migrated as int32, we
05bba0
are limited to describing 2GiB buffer sizes unless we change the
05bba0
migration protocol.
05bba0
05bba0
For this reason, this patch begins to unify the assertions in the
05bba0
IDE pathways that the scatter-gather list provided by either the
05bba0
AHCI PRDT or the PCI BMDMA PRDs can only describe, at a maximum,
05bba0
2GiB. This should be resilient enough unless we need a sector
05bba0
size that exceeds 32KiB.
05bba0
05bba0
Further, the likelihood of any guest operating system actually
05bba0
attempting to transfer this much data in a single operation is
05bba0
very slim.
05bba0
05bba0
To this end, the IDEState variables have been updated to more
05bba0
explicitly clarify our maximum supported size. Callers to the
05bba0
prepare_buf callback have been reworked to understand the new
05bba0
return code, and all versions of the prepare_buf callback have
05bba0
been adjusted accordingly.
05bba0
05bba0
Lastly, the ahci_populate_sglist helper, relied upon by the
05bba0
AHCI implementation of .prepare_buf() as well as the PCI
05bba0
implementation of the callback have had overflow assertions
05bba0
added to help make clear the reasonings behind the various
05bba0
type changes.
05bba0
05bba0
[Added %d -> %"PRId64" fix John sent because off_pos changed from int to
05bba0
int64_t.
05bba0
--Stefan]
05bba0
05bba0
Signed-off-by: John Snow <jsnow@redhat.com>
05bba0
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
05bba0
Message-id: 1414785819-26209-4-git-send-email-jsnow@redhat.com
05bba0
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
05bba0
(cherry picked from commit 3251bdcf1c67427d964517053c3d185b46e618e8)
05bba0
Signed-off-by: John Snow <jsnow@redhat.com>
05bba0
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
05bba0
05bba0
Conflicts:
05bba0
        hw/ide/ahci.c:     Conflicts arising from not backporting bef1301ac
05bba0
                           ahci: unify sglist preparation
05bba0
        hw/ide/core.c:     Conflicts from not backporting IDEDMAOps changes,
05bba0
                           including 9898586d8 and dependencies.
05bba0
        hw/ide/internal.h: Same as hw/ide/core.c.
05bba0
        hw/ide/macio.c:    Conflicts from not backporting (many) IDEDMAOps
05bba0
                           refactorings, 4aa3510f6 and others.
05bba0
        hw/ide/pci.c:      Conflicts arising form not backporting f6c11d564
05bba0
                           ide: Introduce abstract QOM type for PCIIDEState
05bba0
05bba0
Signed-off-by: John Snow <jsnow@redhat.com>
05bba0
---
05bba0
 hw/ide/ahci.c     | 33 ++++++++++++++++++++++++++-------
05bba0
 hw/ide/core.c     | 10 ++++++++--
05bba0
 hw/ide/internal.h | 13 +++++++------
05bba0
 hw/ide/pci.c      | 26 +++++++++++++++++++++-----
05bba0
 4 files changed, 62 insertions(+), 20 deletions(-)
05bba0
05bba0
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
05bba0
index 7f3927a..e951ba0 100644
05bba0
--- a/hw/ide/ahci.c
05bba0
+++ b/hw/ide/ahci.c
05bba0
@@ -640,7 +640,8 @@ static int prdt_tbl_entry_size(const AHCI_SG *tbl)
05bba0
     return (le32_to_cpu(tbl->flags_size) & AHCI_PRDT_SIZE_MASK) + 1;
05bba0
 }
05bba0
 
05bba0
-static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
05bba0
+static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
05bba0
+                                int32_t offset)
05bba0
 {
05bba0
     AHCICmdHdr *cmd = ad->cur_cmd;
05bba0
     uint32_t opts = le32_to_cpu(cmd->opts);
05bba0
@@ -651,11 +652,19 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
05bba0
     uint8_t *prdt;
05bba0
     int i;
05bba0
     int r = 0;
05bba0
-    int sum = 0;
05bba0
+    uint64_t sum = 0;
05bba0
     int off_idx = -1;
05bba0
-    int off_pos = -1;
05bba0
+    int64_t off_pos = -1;
05bba0
     int tbl_entry_size;
05bba0
 
05bba0
+    /*
05bba0
+     * Note: AHCI PRDT can describe up to 256GiB. SATA/ATA only support
05bba0
+     * transactions of up to 32MiB as of ATA8-ACS3 rev 1b, assuming a
05bba0
+     * 512 byte sector size. We limit the PRDT in this implementation to
05bba0
+     * a reasonably large 2GiB, which can accommodate the maximum transfer
05bba0
+     * request for sector sizes up to 32K.
05bba0
+     */
05bba0
+
05bba0
     if (!sglist_alloc_hint) {
05bba0
         DPRINTF(ad->port_no, "no sg list given by guest: 0x%08x\n", opts);
05bba0
         return -1;
05bba0
@@ -690,7 +699,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
05bba0
         }
05bba0
         if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
05bba0
             DPRINTF(ad->port_no, "%s: Incorrect offset! "
05bba0
-                            "off_idx: %d, off_pos: %d\n",
05bba0
+                            "off_idx: %d, off_pos: %"PRId64"\n",
05bba0
                             __func__, off_idx, off_pos);
05bba0
             r = -1;
05bba0
             goto out;
05bba0
@@ -704,6 +713,13 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
05bba0
             /* flags_size is zero-based */
05bba0
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
05bba0
                             prdt_tbl_entry_size(&tbl[i]));
05bba0
+            if (sglist->size > INT32_MAX) {
05bba0
+                error_report("AHCI Physical Region Descriptor Table describes "
05bba0
+                             "more than 2 GiB.\n");
05bba0
+                qemu_sglist_destroy(sglist);
05bba0
+                r = -1;
05bba0
+                goto out;
05bba0
+            }
05bba0
         }
05bba0
     }
05bba0
 
05bba0
@@ -1047,16 +1063,19 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s,
05bba0
     dma_cb(s, 0);
05bba0
 }
05bba0
 
05bba0
-static int ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
+static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
 {
05bba0
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
05bba0
     IDEState *s = &ad->port.ifs[0];
05bba0
 
05bba0
-    ahci_populate_sglist(ad, &s->sg, 0);
05bba0
+    if (ahci_populate_sglist(ad, &s->sg, 0) == -1) {
05bba0
+        DPRINTF(ad->port_no, "ahci_dma_prepare_buf failed.\n");
05bba0
+        return -1;
05bba0
+    }
05bba0
     s->io_buffer_size = s->sg.size;
05bba0
 
05bba0
     DPRINTF(ad->port_no, "len=%#x\n", s->io_buffer_size);
05bba0
-    return s->io_buffer_size != 0;
05bba0
+    return s->io_buffer_size;
05bba0
 }
05bba0
 
05bba0
 static int ahci_dma_rw_buf(IDEDMA *dma, int is_write)
05bba0
diff --git a/hw/ide/core.c b/hw/ide/core.c
05bba0
index 9a22425..d9fdb03 100644
05bba0
--- a/hw/ide/core.c
05bba0
+++ b/hw/ide/core.c
05bba0
@@ -690,10 +690,11 @@ void ide_dma_cb(void *opaque, int ret)
05bba0
     n = s->nsector;
05bba0
     s->io_buffer_index = 0;
05bba0
     s->io_buffer_size = n * 512;
05bba0
-    if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) == 0) {
05bba0
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, ide_cmd_is_read(s)) < 512) {
05bba0
         /* The PRDs were too short. Reset the Active bit, but don't raise an
05bba0
          * interrupt. */
05bba0
         s->status = READY_STAT | SEEK_STAT;
05bba0
+        dma_buf_commit(s);
05bba0
         goto eot;
05bba0
     }
05bba0
 
05bba0
@@ -2131,6 +2132,11 @@ static int ide_nop_int(IDEDMA *dma, int x)
05bba0
     return 0;
05bba0
 }
05bba0
 
05bba0
+static int32_t ide_nop_int32(IDEDMA *dma, int x)
05bba0
+{
05bba0
+    return 0;
05bba0
+}
05bba0
+
05bba0
 static void ide_nop_restart(void *opaque, int x, RunState y)
05bba0
 {
05bba0
 }
05bba0
@@ -2138,7 +2144,7 @@ static void ide_nop_restart(void *opaque, int x, RunState y)
05bba0
 static const IDEDMAOps ide_dma_nop_ops = {
05bba0
     .start_dma      = ide_nop_start,
05bba0
     .start_transfer = ide_nop,
05bba0
-    .prepare_buf    = ide_nop_int,
05bba0
+    .prepare_buf    = ide_nop_int32,
05bba0
     .rw_buf         = ide_nop_int,
05bba0
     .set_unit       = ide_nop_int,
05bba0
     .add_status     = ide_nop_int,
05bba0
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
05bba0
index 0a2d6bc..f8fb564 100644
05bba0
--- a/hw/ide/internal.h
05bba0
+++ b/hw/ide/internal.h
05bba0
@@ -323,6 +323,7 @@ typedef void EndTransferFunc(IDEState *);
05bba0
 typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockDriverCompletionFunc *);
05bba0
 typedef int DMAFunc(IDEDMA *);
05bba0
 typedef int DMAIntFunc(IDEDMA *, int);
05bba0
+typedef int32_t DMAInt32Func(IDEDMA *, int);
05bba0
 typedef void DMARestartFunc(void *, int, RunState);
05bba0
 
05bba0
 struct unreported_events {
05bba0
@@ -384,7 +385,7 @@ struct IDEState {
05bba0
     uint8_t cdrom_changed;
05bba0
     int packet_transfer_size;
05bba0
     int elementary_transfer_size;
05bba0
-    int io_buffer_index;
05bba0
+    int32_t io_buffer_index;
05bba0
     int lba;
05bba0
     int cd_sector_size;
05bba0
     int atapi_dma; /* true if dma is requested for the packet cmd */
05bba0
@@ -393,8 +394,8 @@ struct IDEState {
05bba0
     struct iovec iov;
05bba0
     QEMUIOVector qiov;
05bba0
     /* ATA DMA state */
05bba0
-    int io_buffer_offset;
05bba0
-    int io_buffer_size;
05bba0
+    int32_t io_buffer_offset;
05bba0
+    int32_t io_buffer_size;
05bba0
     QEMUSGList sg;
05bba0
     /* PIO transfer handling */
05bba0
     int req_nb_sectors; /* number of sectors per interrupt */
05bba0
@@ -404,8 +405,8 @@ struct IDEState {
05bba0
     uint8_t *io_buffer;
05bba0
     /* PIO save/restore */
05bba0
     int32_t io_buffer_total_len;
05bba0
-    int cur_io_buffer_offset;
05bba0
-    int cur_io_buffer_len;
05bba0
+    int32_t cur_io_buffer_offset;
05bba0
+    int32_t cur_io_buffer_len;
05bba0
     uint8_t end_transfer_fn_idx;
05bba0
     QEMUTimer *sector_write_timer; /* only used for win2k install hack */
05bba0
     uint32_t irq_count; /* counts IRQs when using win2k install hack */
05bba0
@@ -429,7 +430,7 @@ struct IDEState {
05bba0
 struct IDEDMAOps {
05bba0
     DMAStartFunc *start_dma;
05bba0
     DMAFunc *start_transfer;
05bba0
-    DMAIntFunc *prepare_buf;
05bba0
+    DMAInt32Func *prepare_buf;
05bba0
     DMAIntFunc *rw_buf;
05bba0
     DMAIntFunc *set_unit;
05bba0
     DMAIntFunc *add_status;
05bba0
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
05bba0
index cf7acb0..03b2081 100644
05bba0
--- a/hw/ide/pci.c
05bba0
+++ b/hw/ide/pci.c
05bba0
@@ -28,6 +28,7 @@
05bba0
 #include <hw/isa/isa.h>
05bba0
 #include "block/block.h"
05bba0
 #include "sysemu/dma.h"
05bba0
+#include "qemu/error-report.h"
05bba0
 
05bba0
 #include <hw/ide/pci.h>
05bba0
 
05bba0
@@ -51,8 +52,11 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s,
05bba0
     }
05bba0
 }
05bba0
 
05bba0
-/* return 0 if buffer completed */
05bba0
-static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
+/**
05bba0
+ * Return the number of bytes successfully prepared.
05bba0
+ * -1 on error.
05bba0
+ */
05bba0
+static int32_t bmdma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
 {
05bba0
     BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
05bba0
     IDEState *s = bmdma_active_if(bm);
05bba0
@@ -69,8 +73,9 @@ static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
         if (bm->cur_prd_len == 0) {
05bba0
             /* end of table (with a fail safe of one page) */
05bba0
             if (bm->cur_prd_last ||
05bba0
-                (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
05bba0
-                return s->io_buffer_size != 0;
05bba0
+                (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE) {
05bba0
+                return s->io_buffer_size;
05bba0
+            }
05bba0
             pci_dma_read(&bm->pci_dev->dev, bm->cur_addr, &prd, 8);
05bba0
             bm->cur_addr += 8;
05bba0
             prd.addr = le32_to_cpu(prd.addr);
05bba0
@@ -85,12 +90,23 @@ static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
05bba0
         l = bm->cur_prd_len;
05bba0
         if (l > 0) {
05bba0
             qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
05bba0
+
05bba0
+            /* Note: We limit the max transfer to be 2GiB.
05bba0
+             * This should accommodate the largest ATA transaction
05bba0
+             * for LBA48 (65,536 sectors) and 32K sector sizes. */
05bba0
+            if (s->sg.size > INT32_MAX) {
05bba0
+                error_report("IDE: sglist describes more than 2GiB.\n");
05bba0
+                break;
05bba0
+            }
05bba0
             bm->cur_prd_addr += l;
05bba0
             bm->cur_prd_len -= l;
05bba0
             s->io_buffer_size += l;
05bba0
         }
05bba0
     }
05bba0
-    return 1;
05bba0
+
05bba0
+    qemu_sglist_destroy(&s->sg);
05bba0
+    s->io_buffer_size = 0;
05bba0
+    return -1;
05bba0
 }
05bba0
 
05bba0
 /* return 0 if buffer completed */
05bba0
-- 
05bba0
1.8.3.1
05bba0