From 742bae4cea60c8601bbb6e5ec643167d8ca664d7 Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Tue, 6 Aug 2013 19:52:04 +0200
Subject: vfio: QEMU-AER: Qemu changes to support AER for VFIO-PCI devices
RH-Author: Nigel Croxon <ncroxon@redhat.com>
Message-id: <1375818724-41239-3-git-send-email-ncroxon@redhat.com>
Patchwork-id: 53017
O-Subject: [RHEL7.0 qemu-kvm PATCH v2 2/2] vfio: QEMU-AER: Qemu changes to support AER for VFIO-PCI devices
Bugzilla: 984604
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Alex Williamson <alex.williamson@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
From: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.
The kernel patches for the above functionality has been already accepted.
This is a refresh of the QEMU patch which was reviewed earlier.
http://marc.info/?l=linux-kernel&m=136281557608087&w=2
This patch has the same contents and has been built after refreshing
to latest upstream and after the linux headers have been updated in qemu.
- Create eventfd per vfio device assigned to a guest and register an
event handler
- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl
- When the device encounters an error, the eventfd is signalled
and the qemu eventfd handler gets invoked.
- In the handler decide what action to take. Current action taken
is to stop the guest.
Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
(cherry picked from commit 7b4b0e9eda51902b53bc1a2318df53cdb8b72eed)
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 693a9ff..f8fef8c 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -158,6 +158,7 @@ typedef struct VFIODevice {
PCIHostDeviceAddress host;
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
+ EventNotifier err_notifier;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@@ -165,6 +166,7 @@ typedef struct VFIODevice {
uint8_t pm_cap;
bool reset_works;
bool has_vga;
+ bool pci_aer;
} VFIODevice;
typedef struct VFIOGroup {
@@ -2776,6 +2778,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
{
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
int ret, i;
ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -2919,6 +2922,19 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
vdev->has_vga = true;
}
+ irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+ if (ret) {
+ /* This can fail for an old kernel or legacy PCI dev */
+ DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
+ ret = 0;
+ } else if (irq_info.count == 1) {
+ vdev->pci_aer = true;
+ } else {
+ error_report("vfio: Warning: "
+ "Could not enable error recovery for the device\n");
+ }
error:
if (ret) {
@@ -2941,6 +2957,113 @@ static void vfio_put_device(VFIODevice *vdev)
}
}
+static void vfio_err_notifier_handler(void *opaque)
+{
+ VFIODevice *vdev = opaque;
+
+ if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+ return;
+ }
+
+ /*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+ error_report("%s (%04x:%02x:%02x.%x)"
+ "Unrecoverable error detected...\n"
+ "Please collect any data possible and then kill the guest",
+ __func__, vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+
+ vm_stop(RUN_STATE_IO_ERROR);
+}
+
+/*
+ * Registers error notifier for devices supporting error recovery.
+ * If we encounter a failure in this function, we report an error
+ * and continue after disabling error recovery support for the
+ * device.
+ */
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+ int ret;
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
+
+ if (!vdev->pci_aer) {
+ return;
+ }
+
+ if (event_notifier_init(&vdev->err_notifier, 0)) {
+ error_report("vfio: Warning: "
+ "Unable to init event notifier for error detection\n");
+ vdev->pci_aer = false;
+ return;
+ }
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+
+ *pfd = event_notifier_get_fd(&vdev->err_notifier);
+ qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret) {
+ error_report("vfio: Failed to set up error notification\n");
+ qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ event_notifier_cleanup(&vdev->err_notifier);
+ vdev->pci_aer = false;
+ }
+ g_free(irq_set);
+}
+
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
+ int ret;
+
+ if (!vdev->pci_aer) {
+ return;
+ }
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+ *pfd = -1;
+
+ ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret) {
+ error_report("vfio: Failed to de-assign error fd: %d\n", ret);
+ }
+ g_free(irq_set);
+ qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
+ NULL, NULL, vdev);
+ event_notifier_cleanup(&vdev->err_notifier);
+}
+
static int vfio_initfn(PCIDevice *pdev)
{
VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
@@ -3073,6 +3196,7 @@ static int vfio_initfn(PCIDevice *pdev)
}
add_boot_device_path(vdev->bootindex, &pdev->qdev, NULL);
+ vfio_register_err_notifier(vdev);
return 0;
@@ -3092,6 +3216,7 @@ static void vfio_exitfn(PCIDevice *pdev)
VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
VFIOGroup *group = vdev->group;
+ vfio_unregister_err_notifier(vdev);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_disable_interrupts(vdev);
if (vdev->intx.mmap_timer) {