|
|
9ae3a8 |
From 876b9284b61269d977d0b6b8585ba29758957622 Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
Message-Id: <876b9284b61269d977d0b6b8585ba29758957622.1387382496.git.minovotn@redhat.com>
|
|
|
9ae3a8 |
In-Reply-To: <c5386144fbf09f628148101bc674e2421cdd16e3.1387382496.git.minovotn@redhat.com>
|
|
|
9ae3a8 |
References: <c5386144fbf09f628148101bc674e2421cdd16e3.1387382496.git.minovotn@redhat.com>
|
|
|
9ae3a8 |
From: Nigel Croxon <ncroxon@redhat.com>
|
|
|
9ae3a8 |
Date: Thu, 14 Nov 2013 22:53:07 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 31/46] rdma: IPv6 over Ethernet (RoCE) is broken in linux -
|
|
|
9ae3a8 |
workaround
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
RH-Author: Nigel Croxon <ncroxon@redhat.com>
|
|
|
9ae3a8 |
Message-id: <1384469598-13137-32-git-send-email-ncroxon@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 55713
|
|
|
9ae3a8 |
O-Subject: [RHEL7.0 PATCH 31/42] rdma: IPv6 over Ethernet (RoCE) is broken in linux - workaround
|
|
|
9ae3a8 |
Bugzilla: 1011720
|
|
|
9ae3a8 |
RH-Acked-by: Orit Wasserman <owasserm@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Amit Shah <amit.shah@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Bugzilla: 1011720
|
|
|
9ae3a8 |
https://bugzilla.redhat.com/show_bug.cgi?id=1011720
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
>From commit ID:
|
|
|
9ae3a8 |
commit 7fc5b13fd7b05babc7bcad9dcb8281ae202a9494
|
|
|
9ae3a8 |
Author: Michael R. Hines <mrhines@us.ibm.com>
|
|
|
9ae3a8 |
Date: Fri Aug 9 16:05:44 2013 -0400
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
rdma: IPv6 over Ethernet (RoCE) is broken in linux - workaround
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
We've gotten reports from multiple testers (including Frank Yangjie
|
|
|
9ae3a8 |
and myself) that RDMA IPv6 support over RocE (Ethernet) is broken
|
|
|
9ae3a8 |
in linux.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
A patch to Linux is still in review:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
http://comments.gmane.org/gmane.linux.drivers.rdma/16448
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
If the user is listening on '[::]', then we will not have a opened a device
|
|
|
9ae3a8 |
yet and have no way of verifying if the device is RoCE or not.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
In this case, the source VM will throw an error for ALL types of
|
|
|
9ae3a8 |
connections (both IPv4 and IPv6) if the destination machine does not have
|
|
|
9ae3a8 |
a regular infiniband network available for use.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The only way to gaurantee that an error is thrown for broken kernels is
|
|
|
9ae3a8 |
for the management software to choose a *specific* interface at bind time
|
|
|
9ae3a8 |
and validate what time of hardware it is.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Unfortunately, this puts the user in a fix:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
If the source VM connects with an IPv4 address without knowing that the
|
|
|
9ae3a8 |
destination has bound to '[::]' the migration will unconditionally fail
|
|
|
9ae3a8 |
unless the management software is not explicitly listening on the the IPv4
|
|
|
9ae3a8 |
address while using a RoCE-based device.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
If the source VM connects with an IPv6 address, then we're OK because we can
|
|
|
9ae3a8 |
throw an error on the source (and similarly on the destination).
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
But in mixed environments, this will be broken for a while until it is fixed
|
|
|
9ae3a8 |
inside linux.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
We do provide a *tiny* bit of help in mixed environments, though in this patch:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
We can list all of the devices in the system and check to see if all the
|
|
|
9ae3a8 |
devices are RoCE or Infiniband.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
If we detect that we have a *pure* RoCE environment, then we can safely
|
|
|
9ae3a8 |
thrown an error even if the management sofware has specified '[::]' as the
|
|
|
9ae3a8 |
bind address.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
However, if there is are multiple hetergeneous devices, then we cannot make
|
|
|
9ae3a8 |
this assumption and the user just has to be sure they know what they are doing.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
|
|
|
9ae3a8 |
Message-id: 1376078746-24948-6-git-send-email-mrhines@linux.vnet.ibm.com
|
|
|
9ae3a8 |
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
migration-rdma.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++------
|
|
|
9ae3a8 |
1 files changed, 169 insertions(+), 20 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Michal Novotny <minovotn@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
migration-rdma.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++------
|
|
|
9ae3a8 |
1 file changed, 169 insertions(+), 20 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/migration-rdma.c b/migration-rdma.c
|
|
|
9ae3a8 |
index e6fd77a..ada488e 100644
|
|
|
9ae3a8 |
--- a/migration-rdma.c
|
|
|
9ae3a8 |
+++ b/migration-rdma.c
|
|
|
9ae3a8 |
@@ -707,15 +707,27 @@ static int __qemu_rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
static void qemu_rdma_dump_id(const char *who, struct ibv_context *verbs)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
+ struct ibv_port_attr port;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (ibv_query_port(verbs, 1, &port)) {
|
|
|
9ae3a8 |
+ fprintf(stderr, "FAILED TO QUERY PORT INFORMATION!\n");
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
printf("%s RDMA Device opened: kernel name %s "
|
|
|
9ae3a8 |
"uverbs device name %s, "
|
|
|
9ae3a8 |
- "infiniband_verbs class device path %s,"
|
|
|
9ae3a8 |
- " infiniband class device path %s\n",
|
|
|
9ae3a8 |
+ "infiniband_verbs class device path %s, "
|
|
|
9ae3a8 |
+ "infiniband class device path %s, "
|
|
|
9ae3a8 |
+ "transport: (%d) %s\n",
|
|
|
9ae3a8 |
who,
|
|
|
9ae3a8 |
verbs->device->name,
|
|
|
9ae3a8 |
verbs->device->dev_name,
|
|
|
9ae3a8 |
verbs->device->dev_path,
|
|
|
9ae3a8 |
- verbs->device->ibdev_path);
|
|
|
9ae3a8 |
+ verbs->device->ibdev_path,
|
|
|
9ae3a8 |
+ port.link_layer,
|
|
|
9ae3a8 |
+ (port.link_layer == IBV_LINK_LAYER_INFINIBAND) ? "Infiniband" :
|
|
|
9ae3a8 |
+ ((port.link_layer == IBV_LINK_LAYER_ETHERNET)
|
|
|
9ae3a8 |
+ ? "Ethernet" : "Unknown"));
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/*
|
|
|
9ae3a8 |
@@ -733,6 +745,132 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/*
|
|
|
9ae3a8 |
+ * As of now, IPv6 over RoCE / iWARP is not supported by linux.
|
|
|
9ae3a8 |
+ * We will try the next addrinfo struct, and fail if there are
|
|
|
9ae3a8 |
+ * no other valid addresses to bind against.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If user is listening on '[::]', then we will not have a opened a device
|
|
|
9ae3a8 |
+ * yet and have no way of verifying if the device is RoCE or not.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * In this case, the source VM will throw an error for ALL types of
|
|
|
9ae3a8 |
+ * connections (both IPv4 and IPv6) if the destination machine does not have
|
|
|
9ae3a8 |
+ * a regular infiniband network available for use.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * The only way to gaurantee that an error is thrown for broken kernels is
|
|
|
9ae3a8 |
+ * for the management software to choose a *specific* interface at bind time
|
|
|
9ae3a8 |
+ * and validate what time of hardware it is.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * Unfortunately, this puts the user in a fix:
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If the source VM connects with an IPv4 address without knowing that the
|
|
|
9ae3a8 |
+ * destination has bound to '[::]' the migration will unconditionally fail
|
|
|
9ae3a8 |
+ * unless the management software is explicitly listening on the the IPv4
|
|
|
9ae3a8 |
+ * address while using a RoCE-based device.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If the source VM connects with an IPv6 address, then we're OK because we can
|
|
|
9ae3a8 |
+ * throw an error on the source (and similarly on the destination).
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * But in mixed environments, this will be broken for a while until it is fixed
|
|
|
9ae3a8 |
+ * inside linux.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * We do provide a *tiny* bit of help in this function: We can list all of the
|
|
|
9ae3a8 |
+ * devices in the system and check to see if all the devices are RoCE or
|
|
|
9ae3a8 |
+ * Infiniband.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If we detect that we have a *pure* RoCE environment, then we can safely
|
|
|
9ae3a8 |
+ * thrown an error even if the management sofware has specified '[::]' as the
|
|
|
9ae3a8 |
+ * bind address.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * However, if there is are multiple hetergeneous devices, then we cannot make
|
|
|
9ae3a8 |
+ * this assumption and the user just has to be sure they know what they are
|
|
|
9ae3a8 |
+ * doing.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * Patches are being reviewed on linux-rdma.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ struct ibv_port_attr port_attr;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /* This bug only exists in linux, to our knowledge. */
|
|
|
9ae3a8 |
+#ifdef CONFIG_LINUX
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /*
|
|
|
9ae3a8 |
+ * Verbs are only NULL if management has bound to '[::]'.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * Let's iterate through all the devices and see if there any pure IB
|
|
|
9ae3a8 |
+ * devices (non-ethernet).
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If not, then we can safely proceed with the migration.
|
|
|
9ae3a8 |
+ * Otherwise, there are no gaurantees until the bug is fixed in linux.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+ if (!verbs) {
|
|
|
9ae3a8 |
+ int num_devices, x;
|
|
|
9ae3a8 |
+ struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
|
|
|
9ae3a8 |
+ bool roce_found = false;
|
|
|
9ae3a8 |
+ bool ib_found = false;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ for (x = 0; x < num_devices; x++) {
|
|
|
9ae3a8 |
+ verbs = ibv_open_device(dev_list[x]);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (ibv_query_port(verbs, 1, &port_attr)) {
|
|
|
9ae3a8 |
+ ibv_close_device(verbs);
|
|
|
9ae3a8 |
+ ERROR(errp, "Could not query initial IB port");
|
|
|
9ae3a8 |
+ return -EINVAL;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
|
|
|
9ae3a8 |
+ ib_found = true;
|
|
|
9ae3a8 |
+ } else if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
|
|
|
9ae3a8 |
+ roce_found = true;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ ibv_close_device(verbs);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (roce_found) {
|
|
|
9ae3a8 |
+ if (ib_found) {
|
|
|
9ae3a8 |
+ fprintf(stderr, "WARN: migrations may fail:"
|
|
|
9ae3a8 |
+ " IPv6 over RoCE / iWARP in linux"
|
|
|
9ae3a8 |
+ " is broken. But since you appear to have a"
|
|
|
9ae3a8 |
+ " mixed RoCE / IB environment, be sure to only"
|
|
|
9ae3a8 |
+ " migrate over the IB fabric until the kernel "
|
|
|
9ae3a8 |
+ " fixes the bug.\n");
|
|
|
9ae3a8 |
+ } else {
|
|
|
9ae3a8 |
+ ERROR(errp, "You only have RoCE / iWARP devices in your systems"
|
|
|
9ae3a8 |
+ " and your management software has specified '[::]'"
|
|
|
9ae3a8 |
+ ", but IPv6 over RoCE / iWARP is not supported in Linux.");
|
|
|
9ae3a8 |
+ return -ENONET;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return 0;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /*
|
|
|
9ae3a8 |
+ * If we have a verbs context, that means that some other than '[::]' was
|
|
|
9ae3a8 |
+ * used by the management software for binding. In which case we can actually
|
|
|
9ae3a8 |
+ * warn the user about a potential broken kernel;
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /* IB ports start with 1, not 0 */
|
|
|
9ae3a8 |
+ if (ibv_query_port(verbs, 1, &port_attr)) {
|
|
|
9ae3a8 |
+ ERROR(errp, "Could not query initial IB port");
|
|
|
9ae3a8 |
+ return -EINVAL;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
|
|
|
9ae3a8 |
+ ERROR(errp, "Linux kernel's RoCE / iWARP does not support IPv6 "
|
|
|
9ae3a8 |
+ "(but patches on linux-rdma in progress)");
|
|
|
9ae3a8 |
+ return -ENONET;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+#endif
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return 0;
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+/*
|
|
|
9ae3a8 |
* Figure out which RDMA device corresponds to the requested IP hostname
|
|
|
9ae3a8 |
* Also create the initial connection manager identifiers for opening
|
|
|
9ae3a8 |
* the connection.
|
|
|
9ae3a8 |
@@ -740,22 +878,22 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
|
|
|
9ae3a8 |
static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
int ret;
|
|
|
9ae3a8 |
- struct addrinfo *res;
|
|
|
9ae3a8 |
+ struct rdma_addrinfo *res;
|
|
|
9ae3a8 |
char port_str[16];
|
|
|
9ae3a8 |
struct rdma_cm_event *cm_event;
|
|
|
9ae3a8 |
char ip[40] = "unknown";
|
|
|
9ae3a8 |
- struct addrinfo *e;
|
|
|
9ae3a8 |
+ struct rdma_addrinfo *e;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (rdma->host == NULL || !strcmp(rdma->host, "")) {
|
|
|
9ae3a8 |
ERROR(errp, "RDMA hostname has not been set");
|
|
|
9ae3a8 |
- return -1;
|
|
|
9ae3a8 |
+ return -EINVAL;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* create CM channel */
|
|
|
9ae3a8 |
rdma->channel = rdma_create_event_channel();
|
|
|
9ae3a8 |
if (!rdma->channel) {
|
|
|
9ae3a8 |
ERROR(errp, "could not create CM channel");
|
|
|
9ae3a8 |
- return -1;
|
|
|
9ae3a8 |
+ return -EINVAL;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* create CM id */
|
|
|
9ae3a8 |
@@ -768,21 +906,24 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
|
|
|
9ae3a8 |
snprintf(port_str, 16, "%d", rdma->port);
|
|
|
9ae3a8 |
port_str[15] = '\0';
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- ret = getaddrinfo(rdma->host, port_str, NULL, &res;;
|
|
|
9ae3a8 |
+ ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res;;
|
|
|
9ae3a8 |
if (ret < 0) {
|
|
|
9ae3a8 |
- ERROR(errp, "could not getaddrinfo address %s", rdma->host);
|
|
|
9ae3a8 |
+ ERROR(errp, "could not rdma_getaddrinfo address %s", rdma->host);
|
|
|
9ae3a8 |
goto err_resolve_get_addr;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
for (e = res; e != NULL; e = e->ai_next) {
|
|
|
9ae3a8 |
inet_ntop(e->ai_family,
|
|
|
9ae3a8 |
- &((struct sockaddr_in *) e->ai_addr)->sin_addr, ip, sizeof ip);
|
|
|
9ae3a8 |
+ &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
|
|
|
9ae3a8 |
DPRINTF("Trying %s => %s\n", rdma->host, ip);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- /* resolve the first address */
|
|
|
9ae3a8 |
- ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_addr,
|
|
|
9ae3a8 |
+ ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr,
|
|
|
9ae3a8 |
RDMA_RESOLVE_TIMEOUT_MS);
|
|
|
9ae3a8 |
if (!ret) {
|
|
|
9ae3a8 |
+ ret = qemu_rdma_broken_ipv6_kernel(errp, rdma->cm_id->verbs);
|
|
|
9ae3a8 |
+ if (ret) {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
goto route;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
@@ -803,6 +944,7 @@ route:
|
|
|
9ae3a8 |
ERROR(errp, "result not equal to event_addr_resolved %s",
|
|
|
9ae3a8 |
rdma_event_str(cm_event->event));
|
|
|
9ae3a8 |
perror("rdma_resolve_addr");
|
|
|
9ae3a8 |
+ ret = -EINVAL;
|
|
|
9ae3a8 |
goto err_resolve_get_addr;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
rdma_ack_cm_event(cm_event);
|
|
|
9ae3a8 |
@@ -823,6 +965,7 @@ route:
|
|
|
9ae3a8 |
ERROR(errp, "result not equal to event_route_resolved: %s",
|
|
|
9ae3a8 |
rdma_event_str(cm_event->event));
|
|
|
9ae3a8 |
rdma_ack_cm_event(cm_event);
|
|
|
9ae3a8 |
+ ret = -EINVAL;
|
|
|
9ae3a8 |
goto err_resolve_get_addr;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
rdma_ack_cm_event(cm_event);
|
|
|
9ae3a8 |
@@ -837,8 +980,7 @@ err_resolve_get_addr:
|
|
|
9ae3a8 |
err_resolve_create_id:
|
|
|
9ae3a8 |
rdma_destroy_event_channel(rdma->channel);
|
|
|
9ae3a8 |
rdma->channel = NULL;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- return -1;
|
|
|
9ae3a8 |
+ return ret;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/*
|
|
|
9ae3a8 |
@@ -2266,7 +2408,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
|
|
|
9ae3a8 |
int ret = -EINVAL, idx;
|
|
|
9ae3a8 |
struct rdma_cm_id *listen_id;
|
|
|
9ae3a8 |
char ip[40] = "unknown";
|
|
|
9ae3a8 |
- struct addrinfo *res;
|
|
|
9ae3a8 |
+ struct rdma_addrinfo *res;
|
|
|
9ae3a8 |
char port_str[16];
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
|
|
9ae3a8 |
@@ -2298,20 +2440,27 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
|
|
|
9ae3a8 |
port_str[15] = '\0';
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (rdma->host && strcmp("", rdma->host)) {
|
|
|
9ae3a8 |
- struct addrinfo *e;
|
|
|
9ae3a8 |
+ struct rdma_addrinfo *e;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- ret = getaddrinfo(rdma->host, port_str, NULL, &res;;
|
|
|
9ae3a8 |
+ ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res;;
|
|
|
9ae3a8 |
if (ret < 0) {
|
|
|
9ae3a8 |
- ERROR(errp, "could not getaddrinfo address %s", rdma->host);
|
|
|
9ae3a8 |
+ ERROR(errp, "could not rdma_getaddrinfo address %s", rdma->host);
|
|
|
9ae3a8 |
goto err_dest_init_bind_addr;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
for (e = res; e != NULL; e = e->ai_next) {
|
|
|
9ae3a8 |
inet_ntop(e->ai_family,
|
|
|
9ae3a8 |
- &((struct sockaddr_in *) e->ai_addr)->sin_addr, ip, sizeof ip);
|
|
|
9ae3a8 |
+ &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
|
|
|
9ae3a8 |
DPRINTF("Trying %s => %s\n", rdma->host, ip);
|
|
|
9ae3a8 |
- ret = rdma_bind_addr(listen_id, e->ai_addr);
|
|
|
9ae3a8 |
+ ret = rdma_bind_addr(listen_id, e->ai_dst_addr);
|
|
|
9ae3a8 |
if (!ret) {
|
|
|
9ae3a8 |
+ if (e->ai_family == AF_INET6) {
|
|
|
9ae3a8 |
+ ret = qemu_rdma_broken_ipv6_kernel(errp, listen_id->verbs);
|
|
|
9ae3a8 |
+ if (ret) {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
goto listen;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.7.11.7
|
|
|
9ae3a8 |
|