Blob Blame History Raw
From 64d9964fc97fc525b86e12c5f385dea7e646a3b0 Mon Sep 17 00:00:00 2001
Message-Id: <64d9964fc97fc525b86e12c5f385dea7e646a3b0.1383564115.git.minovotn@redhat.com>
In-Reply-To: <5575e0aec51f40ebec46e98ec085cda053283aba.1383564115.git.minovotn@redhat.com>
References: <5575e0aec51f40ebec46e98ec085cda053283aba.1383564115.git.minovotn@redhat.com>
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 31 Oct 2013 12:13:13 +0100
Subject: [PATCH 12/14] rbd: link and load librbd dynamically

RH-Author: Kevin Wolf <kwolf@redhat.com>
Message-id: <1383221595-24285-2-git-send-email-kwolf@redhat.com>
Patchwork-id: 55183
O-Subject: [RHEL-7.0 qemu-kvm PATCH 1/3] rbd: link and load librbd dynamically
Bugzilla: 989608
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Fam Zheng <famz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>

Bugzilla: 989608
Upstream status: Rejected
Downstream status: Forward ported from RHEL 6

This is the downstream-only part that gets us rid of the build-time
dependency on librbd and loads it dynamically when using an image.
It is based on a patch submitted to qemu-devel and archived as
http://lists.gnu.org/archive/html/qemu-devel/2013-04/msg01814.html

For the forward-port to RHEL 7, I decided to implement the review
suggestions from RHEL 6 to move the function pointers out of a separate
struct to the top level, so that callers don't have to be changed
compared to upstream. This should also reduce merge conflicts in future
forward-ports.

Note that this is not much more than compile tested; the real testing
will be done by Inktank as soon as they can access RHEL 7.

Original commit message follows:

This allows the rbd block driver to detect symbols in the installed
version of librbd, and enable or disable features appropriately.  This
obviates the #ifdefs regarding librbd versions.

Loading librbd dynamically also makes the rbd block driver easier to
install and package, since it removes the dependency on librbd at
build time.

Add structures containing the necessary function pointer signatures
and types from librbd, and fill them in the first time the rbd module
is used. Use glib's g_module interface so we don't preclude future
portability, and don't have to deal with odd dlopen behavior directly.

Internally, librbd and some libraries it depends on use C++ templates,
which mean that they each contain a defined weak symbol for their
definition.  Due to the way the linker resolves duplicate symbols, the
libraries loaded by librbd end up using the template definitions from
librbd, creating a circular dependency. This means that once librbd is
loaded, it won't be unloaded. Changing this behavior might work with a
Sun ld, but there doesn't seem to be a portable (or even working with
GNU ld) way to hide these C++ symbols correctly. Instead, never unload
librbd, and explicitly make it resident.

Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/Makefile.objs |   2 +-
 block/rbd.c         | 218 ++++++++++++++++++++++++++++++++++++++++------------
 block/rbd_types.h   |  91 ++++++++++++++++++++++
 configure           |  41 +---------
 4 files changed, 263 insertions(+), 89 deletions(-)
 create mode 100644 block/rbd_types.h

Signed-off-by: Michal Novotny <minovotn@redhat.com>
---
 block/Makefile.objs |   2 +-
 block/rbd.c         | 218 ++++++++++++++++++++++++++++++++++++++++------------
 block/rbd_types.h   |  91 ++++++++++++++++++++++
 configure           |  41 +---------
 4 files changed, 263 insertions(+), 89 deletions(-)
 create mode 100644 block/rbd_types.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 5f0358a..6b8d5ec 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -12,7 +12,7 @@ ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
-block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-y += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 endif
diff --git a/block/rbd.c b/block/rbd.c
index 0f2608b..7f5cfca 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -11,13 +11,14 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 
+#include <gmodule.h>
 #include <inttypes.h>
 
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 
-#include <rbd/librbd.h>
+#include "rbd_types.h"
 
 /*
  * When specifying the image filename use:
@@ -44,13 +45,6 @@
  * leading "\".
  */
 
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
 #define RBD_MAX_CONF_NAME_SIZE 128
@@ -106,6 +100,10 @@ typedef struct BDRVRBDState {
     RADOSCB *event_rcb;
 } BDRVRBDState;
 
+static bool librbd_loaded;
+static GModule *librbd_handle;
+
+static int qemu_rbd_load_libs(void);
 static void rbd_aio_bh_cb(void *opaque);
 
 static int qemu_rbd_next_tok(char *dst, int dst_len,
@@ -310,6 +308,10 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options)
         return -EINVAL;
     }
 
+    if (qemu_rbd_load_libs() < 0) {
+        return -EIO;
+    }
+
     /* Read out options */
     while (options && options->name) {
         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -487,6 +489,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags)
         goto failed_opts;
     }
 
+    if (qemu_rbd_load_libs() < 0) {
+        return -EIO;
+    }
+
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
     r = rados_create(&s->cluster, clientname);
     if (r < 0) {
@@ -678,28 +684,6 @@ static void rbd_aio_bh_cb(void *opaque)
     }
 }
 
-static int rbd_aio_discard_wrapper(rbd_image_t image,
-                                   uint64_t off,
-                                   uint64_t len,
-                                   rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    return rbd_aio_discard(image, off, len, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
-                                 rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    return rbd_aio_flush(image, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
 static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
                                        int64_t sector_num,
                                        QEMUIOVector *qiov,
@@ -762,10 +746,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
         r = rbd_aio_read(s->image, off, size, buf, c);
         break;
     case RBD_AIO_DISCARD:
-        r = rbd_aio_discard_wrapper(s->image, off, size, c);
+        r = rbd_aio_discard(s->image, off, size, c);
         break;
     case RBD_AIO_FLUSH:
-        r = rbd_aio_flush_wrapper(s->image, c);
+        r = rbd_aio_flush(s->image, c);
         break;
     default:
         r = -EINVAL;
@@ -806,7 +790,6 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                          RBD_AIO_WRITE);
 }
 
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
 static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
                                             BlockDriverCompletionFunc *cb,
                                             void *opaque)
@@ -814,19 +797,14 @@ static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
     return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
 }
 
-#else
-
 static int qemu_rbd_co_flush(BlockDriverState *bs)
 {
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
-    /* rbd_flush added in 0.1.1 */
     BDRVRBDState *s = bs->opaque;
-    return rbd_flush(s->image);
-#else
+    if (rbd_flush) {
+        return rbd_flush(s->image);
+    }
     return 0;
-#endif
 }
-#endif
 
 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
@@ -964,7 +942,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
-#ifdef LIBRBD_SUPPORTS_DISCARD
 static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
                                               int64_t sector_num,
                                               int nb_sectors,
@@ -974,7 +951,6 @@ static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
     return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
                          RBD_AIO_DISCARD);
 }
-#endif
 
 static QEMUOptionParameter qemu_rbd_create_options[] = {
     {
@@ -1004,16 +980,9 @@ static BlockDriver bdrv_rbd = {
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
     .bdrv_aio_writev        = qemu_rbd_aio_writev,
-
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
     .bdrv_aio_flush         = qemu_rbd_aio_flush,
-#else
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
-#endif
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
     .bdrv_aio_discard       = qemu_rbd_aio_discard,
-#endif
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
@@ -1026,4 +995,153 @@ static void bdrv_rbd_init(void)
     bdrv_register(&bdrv_rbd);
 }
 
+typedef struct LibSymbol {
+    const char *name;
+    gpointer *addr;
+} LibSymbol;
+
+static int qemu_rbd_set_functions(GModule *lib, const LibSymbol *funcs)
+{
+    int i = 0;
+    while (funcs[i].name) {
+        const char *name = funcs[i].name;
+        if (!g_module_symbol(lib, name, funcs[i].addr)) {
+            error_report("%s could not be loaded from librbd or librados: %s",
+                         name, g_module_error());
+            return -1;
+        }
+        ++i;
+    }
+    return 0;
+}
+
+/*
+ * Set function pointers for basic librados and librbd
+ * functions that have always been present in these libraries.
+ */
+static int qemu_rbd_set_mandatory_functions(void)
+{
+    LibSymbol symbols[] = {
+        {"rados_create",
+         (gpointer *) &rados_create},
+        {"rados_connect",
+         (gpointer *) &rados_connect},
+        {"rados_shutdown",
+         (gpointer *) &rados_shutdown},
+        {"rados_conf_read_file",
+         (gpointer *) &rados_conf_read_file},
+        {"rados_conf_set",
+         (gpointer *) &rados_conf_set},
+        {"rados_ioctx_create",
+         (gpointer *) &rados_ioctx_create},
+        {"rados_ioctx_destroy",
+         (gpointer *) &rados_ioctx_destroy},
+        {"rbd_create",
+         (gpointer *) &rbd_create},
+        {"rbd_open",
+         (gpointer *) &rbd_open},
+        {"rbd_close",
+         (gpointer *) &rbd_close},
+        {"rbd_resize",
+         (gpointer *) &rbd_resize},
+        {"rbd_stat",
+         (gpointer *) &rbd_stat},
+        {"rbd_snap_list",
+         (gpointer *) &rbd_snap_list},
+        {"rbd_snap_list_end",
+         (gpointer *) &rbd_snap_list_end},
+        {"rbd_snap_create",
+         (gpointer *) &rbd_snap_create},
+        {"rbd_snap_remove",
+         (gpointer *) &rbd_snap_remove},
+        {"rbd_snap_rollback",
+         (gpointer *) &rbd_snap_rollback},
+        {"rbd_aio_write",
+         (gpointer *) &rbd_aio_write},
+        {"rbd_aio_read",
+         (gpointer *) &rbd_aio_read},
+        {"rbd_aio_create_completion",
+         (gpointer *) &rbd_aio_create_completion},
+        {"rbd_aio_get_return_value",
+         (gpointer *) &rbd_aio_get_return_value},
+        {"rbd_aio_release",
+         (gpointer *) &rbd_aio_release},
+        {NULL}
+    };
+
+    if (qemu_rbd_set_functions(librbd_handle, symbols) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Detect whether the installed version of librbd
+ * supports newer functionality, and enable or disable
+ * it appropriately in bdrv_rbd.
+ */
+static void qemu_rbd_set_optional_functions(void)
+{
+    if (g_module_symbol(librbd_handle, "rbd_flush",
+                         (gpointer *) &rbd_flush)) {
+        bdrv_rbd.bdrv_aio_flush = NULL;
+        bdrv_rbd.bdrv_co_flush_to_disk = qemu_rbd_co_flush;
+    } else {
+        rbd_flush = NULL;
+        bdrv_rbd.bdrv_co_flush_to_disk = NULL;
+    }
+
+    if (g_module_symbol(librbd_handle, "rbd_aio_flush",
+                        (gpointer *) &rbd_aio_flush)) {
+        bdrv_rbd.bdrv_co_flush_to_disk = NULL;
+        bdrv_rbd.bdrv_aio_flush = qemu_rbd_aio_flush;
+    } else {
+        rbd_aio_flush = NULL;
+        bdrv_rbd.bdrv_aio_flush = NULL;
+    }
+
+    if (g_module_symbol(librbd_handle, "rbd_aio_discard",
+                        (gpointer *) &rbd_aio_discard)) {
+        bdrv_rbd.bdrv_aio_discard = qemu_rbd_aio_discard;
+    } else {
+        rbd_aio_discard = NULL;
+        bdrv_rbd.bdrv_aio_discard = NULL;
+    }
+}
+
+static int qemu_rbd_load_libs(void)
+{
+    if (librbd_loaded) {
+        return 0;
+    }
+
+    if (!g_module_supported()) {
+        error_report("modules are not supported on this platform: %s",
+                     g_module_error());
+        return -1;
+    }
+
+    librbd_handle = g_module_open("librbd.so.1", 0);
+    if (!librbd_handle) {
+        error_report("error loading librbd: %s", g_module_error());
+        return -1;
+    }
+
+    /*
+     * Due to c++ templates used in librbd/librados and their
+     * dependencies, and linker duplicate trimming rules, closing
+     * librbd would leave it mapped. Make this explicit.
+     */
+    g_module_make_resident(librbd_handle);
+
+    if (qemu_rbd_set_mandatory_functions() < 0) {
+        return -1;
+    }
+    qemu_rbd_set_optional_functions();
+    librbd_loaded = true;
+
+    return 0;
+}
+
 block_init(bdrv_rbd_init);
diff --git a/block/rbd_types.h b/block/rbd_types.h
new file mode 100644
index 0000000..f327cb4
--- /dev/null
+++ b/block/rbd_types.h
@@ -0,0 +1,91 @@
+/*
+ * Types and signatures for librados and librbd
+ *
+ * Copyright (C) 2013 Inktank Storage Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_BLOCK_RBD_TYPES_H
+#define QEMU_BLOCK_RBD_TYPES_H
+
+/* types from librados used by the rbd block driver */
+
+typedef void *rados_t;
+typedef void *rados_ioctx_t;
+
+static int   (*rados_create)(rados_t *cluster, const char * const id);
+static int   (*rados_connect)(rados_t cluster);
+static void  (*rados_shutdown)(rados_t cluster);
+static int   (*rados_conf_read_file)(rados_t cluster, const char *path);
+static int   (*rados_conf_set)(rados_t cluster, const char *option,
+                               const char *value);
+static int   (*rados_ioctx_create)(rados_t cluster, const char *pool_name,
+                                   rados_ioctx_t *ioctx);
+static void  (*rados_ioctx_destroy)(rados_ioctx_t io);
+
+/* types from librbd used by the rbd block driver*/
+
+typedef void *rbd_image_t;
+typedef void *rbd_completion_t;
+typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg);
+
+typedef struct {
+    uint64_t id;
+    uint64_t size;
+    const char *name;
+} rbd_snap_info_t;
+
+#define RBD_MAX_IMAGE_NAME_SIZE 96
+#define RBD_MAX_BLOCK_NAME_SIZE 24
+
+typedef struct {
+    uint64_t size;
+    uint64_t obj_size;
+    uint64_t num_objs;
+    int order;
+    char block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE];
+    int64_t parent_pool;
+    char parent_name[RBD_MAX_IMAGE_NAME_SIZE];
+} rbd_image_info_t;
+
+static int      (*rbd_create)(rados_ioctx_t io, const char *name, uint64_t size,
+                              int *order);
+static int      (*rbd_open)(rados_ioctx_t io, const char *name, rbd_image_t *image,
+                            const char *snap_name);
+static int      (*rbd_close)(rbd_image_t image);
+static int      (*rbd_resize)(rbd_image_t image, uint64_t size);
+static int      (*rbd_stat)(rbd_image_t image, rbd_image_info_t *info,
+                            size_t infosize);
+static int      (*rbd_snap_list)(rbd_image_t image, rbd_snap_info_t *snaps,
+                                 int *max_snaps);
+static void     (*rbd_snap_list_end)(rbd_snap_info_t *snaps);
+static int      (*rbd_snap_create)(rbd_image_t image, const char *snapname);
+static int      (*rbd_snap_remove)(rbd_image_t image, const char *snapname);
+static int      (*rbd_snap_rollback)(rbd_image_t image, const char *snapname);
+static int      (*rbd_aio_write)(rbd_image_t image, uint64_t off, size_t len,
+                                 const char *buf, rbd_completion_t c);
+static int      (*rbd_aio_read)(rbd_image_t image, uint64_t off, size_t len,
+                                char *buf, rbd_completion_t c);
+static int      (*rbd_aio_discard)(rbd_image_t image, uint64_t off, uint64_t len,
+                                   rbd_completion_t c);
+static int      (*rbd_aio_create_completion)(void *cb_arg,
+                                             rbd_callback_t complete_cb,
+                                             rbd_completion_t *c);
+static ssize_t  (*rbd_aio_get_return_value)(rbd_completion_t c);
+static void     (*rbd_aio_release)(rbd_completion_t c);
+static int      (*rbd_flush)(rbd_image_t image);
+static int      (*rbd_aio_flush)(rbd_image_t image, rbd_completion_t c);
+
+#endif
diff --git a/configure b/configure
index 4d1bc44..df587d7 100755
--- a/configure
+++ b/configure
@@ -224,7 +224,6 @@ qom_cast_debug="yes"
 trace_backend="nop"
 trace_file="trace"
 spice=""
-rbd=""
 smartcard_nss=""
 libusb=""
 usb_redir=""
@@ -880,10 +879,6 @@ for opt do
   ;;
   --enable-glx) glx="yes"
   ;;
-  --disable-rbd) rbd="no"
-  ;;
-  --enable-rbd) rbd="yes"
-  ;;
   --disable-xfsctl) xfs="no"
   ;;
   --enable-xfsctl) xfs="yes"
@@ -1181,7 +1176,6 @@ echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
 echo "                           Default:trace-<pid>"
 echo "  --disable-spice          disable spice"
 echo "  --enable-spice           enable spice"
-echo "  --enable-rbd             enable building the rados block device (rbd)"
 echo "  --disable-libiscsi       disable iscsi support"
 echo "  --enable-libiscsi        enable iscsi support"
 echo "  --disable-smartcard-nss  disable smartcard nss support"
@@ -2250,10 +2244,10 @@ if test "$mingw32" = yes; then
 else
     glib_req_ver=2.12
 fi
-if $pkg_config --atleast-version=$glib_req_ver gthread-2.0 > /dev/null 2>&1
+if $pkg_config --atleast-version=$glib_req_ver gthread-2.0 gmodule-2.0 > /dev/null 2>&1
 then
-    glib_cflags=`$pkg_config --cflags gthread-2.0 2>/dev/null`
-    glib_libs=`$pkg_config --libs gthread-2.0 2>/dev/null`
+    glib_cflags=`$pkg_config --cflags gthread-2.0 gmodule-2.0 2>/dev/null`
+    glib_libs=`$pkg_config --libs gthread-2.0 gmodule-2.0 2>/dev/null`
     LIBS="$glib_libs $LIBS"
     libs_qga="$glib_libs $libs_qga"
 else
@@ -2354,31 +2348,6 @@ if test "$mingw32" != yes -a "$pthread" = no; then
 fi
 
 ##########################################
-# rbd probe
-if test "$rbd" != "no" ; then
-  cat > $TMPC <<EOF
-#include <stdio.h>
-#include <rbd/librbd.h>
-int main(void) {
-    rados_t cluster;
-    rados_create(&cluster, NULL);
-    return 0;
-}
-EOF
-  rbd_libs="-lrbd -lrados"
-  if compile_prog "" "$rbd_libs" ; then
-    rbd=yes
-    libs_tools="$rbd_libs $libs_tools"
-    libs_softmmu="$rbd_libs $libs_softmmu"
-  else
-    if test "$rbd" = "yes" ; then
-      feature_not_found "rados block device"
-    fi
-    rbd=no
-  fi
-fi
-
-##########################################
 # libssh2 probe
 min_libssh2_version=1.2.8
 if test "$libssh2" != "no" ; then
@@ -3573,7 +3542,6 @@ echo "vhost-scsi support $vhost_scsi"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice ($spice_protocol_version/$spice_server_version)"
-echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
 echo "libusb            $libusb"
@@ -3928,9 +3896,6 @@ fi
 if test "$qom_cast_debug" = "yes" ; then
   echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak
 fi
-if test "$rbd" = "yes" ; then
-  echo "CONFIG_RBD=y" >> $config_host_mak
-fi
 
 echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
 
-- 
1.7.11.7