From 64d9964fc97fc525b86e12c5f385dea7e646a3b0 Mon Sep 17 00:00:00 2001 Message-Id: <64d9964fc97fc525b86e12c5f385dea7e646a3b0.1383564115.git.minovotn@redhat.com> In-Reply-To: <5575e0aec51f40ebec46e98ec085cda053283aba.1383564115.git.minovotn@redhat.com> References: <5575e0aec51f40ebec46e98ec085cda053283aba.1383564115.git.minovotn@redhat.com> From: Kevin Wolf Date: Thu, 31 Oct 2013 12:13:13 +0100 Subject: [PATCH 12/14] rbd: link and load librbd dynamically RH-Author: Kevin Wolf Message-id: <1383221595-24285-2-git-send-email-kwolf@redhat.com> Patchwork-id: 55183 O-Subject: [RHEL-7.0 qemu-kvm PATCH 1/3] rbd: link and load librbd dynamically Bugzilla: 989608 RH-Acked-by: Paolo Bonzini RH-Acked-by: Fam Zheng RH-Acked-by: Stefan Hajnoczi Bugzilla: 989608 Upstream status: Rejected Downstream status: Forward ported from RHEL 6 This is the downstream-only part that gets us rid of the build-time dependency on librbd and loads it dynamically when using an image. It is based on a patch submitted to qemu-devel and archived as http://lists.gnu.org/archive/html/qemu-devel/2013-04/msg01814.html For the forward-port to RHEL 7, I decided to implement the review suggestions from RHEL 6 to move the function pointers out of a separate struct to the top level, so that callers don't have to be changed compared to upstream. This should also reduce merge conflicts in future forward-ports. Note that this is not much more than compile tested; the real testing will be done by Inktank as soon as they can access RHEL 7. Original commit message follows: This allows the rbd block driver to detect symbols in the installed version of librbd, and enable or disable features appropriately. This obviates the #ifdefs regarding librbd versions. Loading librbd dynamically also makes the rbd block driver easier to install and package, since it removes the dependency on librbd at build time. Add structures containing the necessary function pointer signatures and types from librbd, and fill them in the first time the rbd module is used. Use glib's g_module interface so we don't preclude future portability, and don't have to deal with odd dlopen behavior directly. Internally, librbd and some libraries it depends on use C++ templates, which mean that they each contain a defined weak symbol for their definition. Due to the way the linker resolves duplicate symbols, the libraries loaded by librbd end up using the template definitions from librbd, creating a circular dependency. This means that once librbd is loaded, it won't be unloaded. Changing this behavior might work with a Sun ld, but there doesn't seem to be a portable (or even working with GNU ld) way to hide these C++ symbols correctly. Instead, never unload librbd, and explicitly make it resident. Signed-off-by: Josh Durgin Signed-off-by: Kevin Wolf --- block/Makefile.objs | 2 +- block/rbd.c | 218 ++++++++++++++++++++++++++++++++++++++++------------ block/rbd_types.h | 91 ++++++++++++++++++++++ configure | 41 +--------- 4 files changed, 263 insertions(+), 89 deletions(-) create mode 100644 block/rbd_types.h Signed-off-by: Michal Novotny --- block/Makefile.objs | 2 +- block/rbd.c | 218 ++++++++++++++++++++++++++++++++++++++++------------ block/rbd_types.h | 91 ++++++++++++++++++++++ configure | 41 +--------- 4 files changed, 263 insertions(+), 89 deletions(-) create mode 100644 block/rbd_types.h diff --git a/block/Makefile.objs b/block/Makefile.objs index 5f0358a..6b8d5ec 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -12,7 +12,7 @@ ifeq ($(CONFIG_POSIX),y) block-obj-y += nbd.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o -block-obj-$(CONFIG_RBD) += rbd.o +block-obj-y += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_LIBSSH2) += ssh.o endif diff --git a/block/rbd.c b/block/rbd.c index 0f2608b..7f5cfca 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -11,13 +11,14 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include #include #include "qemu-common.h" #include "qemu/error-report.h" #include "block/block_int.h" -#include +#include "rbd_types.h" /* * When specifying the image filename use: @@ -44,13 +45,6 @@ * leading "\". */ -/* rbd_aio_discard added in 0.1.2 */ -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) -#define LIBRBD_SUPPORTS_DISCARD -#else -#undef LIBRBD_SUPPORTS_DISCARD -#endif - #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) #define RBD_MAX_CONF_NAME_SIZE 128 @@ -106,6 +100,10 @@ typedef struct BDRVRBDState { RADOSCB *event_rcb; } BDRVRBDState; +static bool librbd_loaded; +static GModule *librbd_handle; + +static int qemu_rbd_load_libs(void); static void rbd_aio_bh_cb(void *opaque); static int qemu_rbd_next_tok(char *dst, int dst_len, @@ -310,6 +308,10 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) return -EINVAL; } + if (qemu_rbd_load_libs() < 0) { + return -EIO; + } + /* Read out options */ while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { @@ -487,6 +489,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags) goto failed_opts; } + if (qemu_rbd_load_libs() < 0) { + return -EIO; + } + clientname = qemu_rbd_parse_clientname(conf, clientname_buf); r = rados_create(&s->cluster, clientname); if (r < 0) { @@ -678,28 +684,6 @@ static void rbd_aio_bh_cb(void *opaque) } } -static int rbd_aio_discard_wrapper(rbd_image_t image, - uint64_t off, - uint64_t len, - rbd_completion_t comp) -{ -#ifdef LIBRBD_SUPPORTS_DISCARD - return rbd_aio_discard(image, off, len, comp); -#else - return -ENOTSUP; -#endif -} - -static int rbd_aio_flush_wrapper(rbd_image_t image, - rbd_completion_t comp) -{ -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH - return rbd_aio_flush(image, comp); -#else - return -ENOTSUP; -#endif -} - static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, @@ -762,10 +746,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, r = rbd_aio_read(s->image, off, size, buf, c); break; case RBD_AIO_DISCARD: - r = rbd_aio_discard_wrapper(s->image, off, size, c); + r = rbd_aio_discard(s->image, off, size, c); break; case RBD_AIO_FLUSH: - r = rbd_aio_flush_wrapper(s->image, c); + r = rbd_aio_flush(s->image, c); break; default: r = -EINVAL; @@ -806,7 +790,6 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs, RBD_AIO_WRITE); } -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) @@ -814,19 +797,14 @@ static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs, return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH); } -#else - static int qemu_rbd_co_flush(BlockDriverState *bs) { -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) - /* rbd_flush added in 0.1.1 */ BDRVRBDState *s = bs->opaque; - return rbd_flush(s->image); -#else + if (rbd_flush) { + return rbd_flush(s->image); + } return 0; -#endif } -#endif static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) { @@ -964,7 +942,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, return snap_count; } -#ifdef LIBRBD_SUPPORTS_DISCARD static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors, @@ -974,7 +951,6 @@ static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs, return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque, RBD_AIO_DISCARD); } -#endif static QEMUOptionParameter qemu_rbd_create_options[] = { { @@ -1004,16 +980,9 @@ static BlockDriver bdrv_rbd = { .bdrv_aio_readv = qemu_rbd_aio_readv, .bdrv_aio_writev = qemu_rbd_aio_writev, - -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH .bdrv_aio_flush = qemu_rbd_aio_flush, -#else .bdrv_co_flush_to_disk = qemu_rbd_co_flush, -#endif - -#ifdef LIBRBD_SUPPORTS_DISCARD .bdrv_aio_discard = qemu_rbd_aio_discard, -#endif .bdrv_snapshot_create = qemu_rbd_snap_create, .bdrv_snapshot_delete = qemu_rbd_snap_remove, @@ -1026,4 +995,153 @@ static void bdrv_rbd_init(void) bdrv_register(&bdrv_rbd); } +typedef struct LibSymbol { + const char *name; + gpointer *addr; +} LibSymbol; + +static int qemu_rbd_set_functions(GModule *lib, const LibSymbol *funcs) +{ + int i = 0; + while (funcs[i].name) { + const char *name = funcs[i].name; + if (!g_module_symbol(lib, name, funcs[i].addr)) { + error_report("%s could not be loaded from librbd or librados: %s", + name, g_module_error()); + return -1; + } + ++i; + } + return 0; +} + +/* + * Set function pointers for basic librados and librbd + * functions that have always been present in these libraries. + */ +static int qemu_rbd_set_mandatory_functions(void) +{ + LibSymbol symbols[] = { + {"rados_create", + (gpointer *) &rados_create}, + {"rados_connect", + (gpointer *) &rados_connect}, + {"rados_shutdown", + (gpointer *) &rados_shutdown}, + {"rados_conf_read_file", + (gpointer *) &rados_conf_read_file}, + {"rados_conf_set", + (gpointer *) &rados_conf_set}, + {"rados_ioctx_create", + (gpointer *) &rados_ioctx_create}, + {"rados_ioctx_destroy", + (gpointer *) &rados_ioctx_destroy}, + {"rbd_create", + (gpointer *) &rbd_create}, + {"rbd_open", + (gpointer *) &rbd_open}, + {"rbd_close", + (gpointer *) &rbd_close}, + {"rbd_resize", + (gpointer *) &rbd_resize}, + {"rbd_stat", + (gpointer *) &rbd_stat}, + {"rbd_snap_list", + (gpointer *) &rbd_snap_list}, + {"rbd_snap_list_end", + (gpointer *) &rbd_snap_list_end}, + {"rbd_snap_create", + (gpointer *) &rbd_snap_create}, + {"rbd_snap_remove", + (gpointer *) &rbd_snap_remove}, + {"rbd_snap_rollback", + (gpointer *) &rbd_snap_rollback}, + {"rbd_aio_write", + (gpointer *) &rbd_aio_write}, + {"rbd_aio_read", + (gpointer *) &rbd_aio_read}, + {"rbd_aio_create_completion", + (gpointer *) &rbd_aio_create_completion}, + {"rbd_aio_get_return_value", + (gpointer *) &rbd_aio_get_return_value}, + {"rbd_aio_release", + (gpointer *) &rbd_aio_release}, + {NULL} + }; + + if (qemu_rbd_set_functions(librbd_handle, symbols) < 0) { + return -1; + } + + return 0; +} + +/* + * Detect whether the installed version of librbd + * supports newer functionality, and enable or disable + * it appropriately in bdrv_rbd. + */ +static void qemu_rbd_set_optional_functions(void) +{ + if (g_module_symbol(librbd_handle, "rbd_flush", + (gpointer *) &rbd_flush)) { + bdrv_rbd.bdrv_aio_flush = NULL; + bdrv_rbd.bdrv_co_flush_to_disk = qemu_rbd_co_flush; + } else { + rbd_flush = NULL; + bdrv_rbd.bdrv_co_flush_to_disk = NULL; + } + + if (g_module_symbol(librbd_handle, "rbd_aio_flush", + (gpointer *) &rbd_aio_flush)) { + bdrv_rbd.bdrv_co_flush_to_disk = NULL; + bdrv_rbd.bdrv_aio_flush = qemu_rbd_aio_flush; + } else { + rbd_aio_flush = NULL; + bdrv_rbd.bdrv_aio_flush = NULL; + } + + if (g_module_symbol(librbd_handle, "rbd_aio_discard", + (gpointer *) &rbd_aio_discard)) { + bdrv_rbd.bdrv_aio_discard = qemu_rbd_aio_discard; + } else { + rbd_aio_discard = NULL; + bdrv_rbd.bdrv_aio_discard = NULL; + } +} + +static int qemu_rbd_load_libs(void) +{ + if (librbd_loaded) { + return 0; + } + + if (!g_module_supported()) { + error_report("modules are not supported on this platform: %s", + g_module_error()); + return -1; + } + + librbd_handle = g_module_open("librbd.so.1", 0); + if (!librbd_handle) { + error_report("error loading librbd: %s", g_module_error()); + return -1; + } + + /* + * Due to c++ templates used in librbd/librados and their + * dependencies, and linker duplicate trimming rules, closing + * librbd would leave it mapped. Make this explicit. + */ + g_module_make_resident(librbd_handle); + + if (qemu_rbd_set_mandatory_functions() < 0) { + return -1; + } + qemu_rbd_set_optional_functions(); + librbd_loaded = true; + + return 0; +} + block_init(bdrv_rbd_init); diff --git a/block/rbd_types.h b/block/rbd_types.h new file mode 100644 index 0000000..f327cb4 --- /dev/null +++ b/block/rbd_types.h @@ -0,0 +1,91 @@ +/* + * Types and signatures for librados and librbd + * + * Copyright (C) 2013 Inktank Storage Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QEMU_BLOCK_RBD_TYPES_H +#define QEMU_BLOCK_RBD_TYPES_H + +/* types from librados used by the rbd block driver */ + +typedef void *rados_t; +typedef void *rados_ioctx_t; + +static int (*rados_create)(rados_t *cluster, const char * const id); +static int (*rados_connect)(rados_t cluster); +static void (*rados_shutdown)(rados_t cluster); +static int (*rados_conf_read_file)(rados_t cluster, const char *path); +static int (*rados_conf_set)(rados_t cluster, const char *option, + const char *value); +static int (*rados_ioctx_create)(rados_t cluster, const char *pool_name, + rados_ioctx_t *ioctx); +static void (*rados_ioctx_destroy)(rados_ioctx_t io); + +/* types from librbd used by the rbd block driver*/ + +typedef void *rbd_image_t; +typedef void *rbd_completion_t; +typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg); + +typedef struct { + uint64_t id; + uint64_t size; + const char *name; +} rbd_snap_info_t; + +#define RBD_MAX_IMAGE_NAME_SIZE 96 +#define RBD_MAX_BLOCK_NAME_SIZE 24 + +typedef struct { + uint64_t size; + uint64_t obj_size; + uint64_t num_objs; + int order; + char block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE]; + int64_t parent_pool; + char parent_name[RBD_MAX_IMAGE_NAME_SIZE]; +} rbd_image_info_t; + +static int (*rbd_create)(rados_ioctx_t io, const char *name, uint64_t size, + int *order); +static int (*rbd_open)(rados_ioctx_t io, const char *name, rbd_image_t *image, + const char *snap_name); +static int (*rbd_close)(rbd_image_t image); +static int (*rbd_resize)(rbd_image_t image, uint64_t size); +static int (*rbd_stat)(rbd_image_t image, rbd_image_info_t *info, + size_t infosize); +static int (*rbd_snap_list)(rbd_image_t image, rbd_snap_info_t *snaps, + int *max_snaps); +static void (*rbd_snap_list_end)(rbd_snap_info_t *snaps); +static int (*rbd_snap_create)(rbd_image_t image, const char *snapname); +static int (*rbd_snap_remove)(rbd_image_t image, const char *snapname); +static int (*rbd_snap_rollback)(rbd_image_t image, const char *snapname); +static int (*rbd_aio_write)(rbd_image_t image, uint64_t off, size_t len, + const char *buf, rbd_completion_t c); +static int (*rbd_aio_read)(rbd_image_t image, uint64_t off, size_t len, + char *buf, rbd_completion_t c); +static int (*rbd_aio_discard)(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c); +static int (*rbd_aio_create_completion)(void *cb_arg, + rbd_callback_t complete_cb, + rbd_completion_t *c); +static ssize_t (*rbd_aio_get_return_value)(rbd_completion_t c); +static void (*rbd_aio_release)(rbd_completion_t c); +static int (*rbd_flush)(rbd_image_t image); +static int (*rbd_aio_flush)(rbd_image_t image, rbd_completion_t c); + +#endif diff --git a/configure b/configure index 4d1bc44..df587d7 100755 --- a/configure +++ b/configure @@ -224,7 +224,6 @@ qom_cast_debug="yes" trace_backend="nop" trace_file="trace" spice="" -rbd="" smartcard_nss="" libusb="" usb_redir="" @@ -880,10 +879,6 @@ for opt do ;; --enable-glx) glx="yes" ;; - --disable-rbd) rbd="no" - ;; - --enable-rbd) rbd="yes" - ;; --disable-xfsctl) xfs="no" ;; --enable-xfsctl) xfs="yes" @@ -1181,7 +1176,6 @@ echo " --with-trace-file=NAME Full PATH,NAME of file to store traces" echo " Default:trace-" echo " --disable-spice disable spice" echo " --enable-spice enable spice" -echo " --enable-rbd enable building the rados block device (rbd)" echo " --disable-libiscsi disable iscsi support" echo " --enable-libiscsi enable iscsi support" echo " --disable-smartcard-nss disable smartcard nss support" @@ -2250,10 +2244,10 @@ if test "$mingw32" = yes; then else glib_req_ver=2.12 fi -if $pkg_config --atleast-version=$glib_req_ver gthread-2.0 > /dev/null 2>&1 +if $pkg_config --atleast-version=$glib_req_ver gthread-2.0 gmodule-2.0 > /dev/null 2>&1 then - glib_cflags=`$pkg_config --cflags gthread-2.0 2>/dev/null` - glib_libs=`$pkg_config --libs gthread-2.0 2>/dev/null` + glib_cflags=`$pkg_config --cflags gthread-2.0 gmodule-2.0 2>/dev/null` + glib_libs=`$pkg_config --libs gthread-2.0 gmodule-2.0 2>/dev/null` LIBS="$glib_libs $LIBS" libs_qga="$glib_libs $libs_qga" else @@ -2354,31 +2348,6 @@ if test "$mingw32" != yes -a "$pthread" = no; then fi ########################################## -# rbd probe -if test "$rbd" != "no" ; then - cat > $TMPC < -#include -int main(void) { - rados_t cluster; - rados_create(&cluster, NULL); - return 0; -} -EOF - rbd_libs="-lrbd -lrados" - if compile_prog "" "$rbd_libs" ; then - rbd=yes - libs_tools="$rbd_libs $libs_tools" - libs_softmmu="$rbd_libs $libs_softmmu" - else - if test "$rbd" = "yes" ; then - feature_not_found "rados block device" - fi - rbd=no - fi -fi - -########################################## # libssh2 probe min_libssh2_version=1.2.8 if test "$libssh2" != "no" ; then @@ -3573,7 +3542,6 @@ echo "vhost-scsi support $vhost_scsi" echo "Trace backend $trace_backend" echo "Trace output file $trace_file-" echo "spice support $spice ($spice_protocol_version/$spice_server_version)" -echo "rbd support $rbd" echo "xfsctl support $xfs" echo "nss used $smartcard_nss" echo "libusb $libusb" @@ -3928,9 +3896,6 @@ fi if test "$qom_cast_debug" = "yes" ; then echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak fi -if test "$rbd" = "yes" ; then - echo "CONFIG_RBD=y" >> $config_host_mak -fi echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak -- 1.7.11.7