Blame SOURCES/0001-Revert-ucx-check-supported-transports-and-devices-fo.patch

c5e0aa
From 63c80c7692e55f634cbca6f67cc5c9cdef3a04d2 Mon Sep 17 00:00:00 2001
c5e0aa
From: Honggang Li <honli@redhat.com>
c5e0aa
Date: Mon, 28 Jun 2021 21:38:13 +0800
c5e0aa
Subject: [PATCH] Revert "ucx: check supported transports and devices for
c5e0aa
 setting priority"
c5e0aa
c5e0aa
This reverts commit c36d7459b6331c4da825cad5a64326e7c1a272aa.
c5e0aa
---
c5e0aa
 contrib/platform/mellanox/optimized.conf |   2 -
c5e0aa
 ompi/mca/pml/ucx/pml_ucx_component.c     |  15 +-
c5e0aa
 opal/mca/common/ucx/common_ucx.c         | 202 +----------------------
c5e0aa
 opal/mca/common/ucx/common_ucx.h         |  15 --
c5e0aa
 opal/mca/common/ucx/configure.m4         |   2 -
c5e0aa
 5 files changed, 2 insertions(+), 234 deletions(-)
c5e0aa
c5e0aa
diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf
c5e0aa
index 543fd8d1e224..b86b37c9e2fa 100644
c5e0aa
--- a/contrib/platform/mellanox/optimized.conf
c5e0aa
+++ b/contrib/platform/mellanox/optimized.conf
c5e0aa
@@ -61,8 +61,6 @@
c5e0aa
 coll = ^ml
c5e0aa
 hwloc_base_binding_policy = core
c5e0aa
 btl = self
c5e0aa
-pml_ucx_tls = any
c5e0aa
-pml_ucx_devices = any
c5e0aa
 # Basic behavior to smooth startup
c5e0aa
 mca_base_component_show_load_errors = 0
c5e0aa
 orte_abort_timeout = 10
c5e0aa
diff --git a/ompi/mca/pml/ucx/pml_ucx_component.c b/ompi/mca/pml/ucx/pml_ucx_component.c
c5e0aa
index 6aed6c41d11d..ed9cc6573e8e 100644
c5e0aa
--- a/ompi/mca/pml/ucx/pml_ucx_component.c
c5e0aa
+++ b/ompi/mca/pml/ucx/pml_ucx_component.c
c5e0aa
@@ -107,26 +107,13 @@ static mca_pml_base_module_t*
c5e0aa
 mca_pml_ucx_component_init(int* priority, bool enable_progress_threads,
c5e0aa
                            bool enable_mpi_threads)
c5e0aa
 {
c5e0aa
-    opal_common_ucx_support_level_t support_level;
c5e0aa
     int ret;
c5e0aa
 
c5e0aa
-    support_level = opal_common_ucx_support_level(ompi_pml_ucx.ucp_context);
c5e0aa
-    if (support_level == OPAL_COMMON_UCX_SUPPORT_NONE) {
c5e0aa
-        return NULL;
c5e0aa
-    }
c5e0aa
-
c5e0aa
     if ( (ret = mca_pml_ucx_init(enable_mpi_threads)) != 0) {
c5e0aa
         return NULL;
c5e0aa
     }
c5e0aa
 
c5e0aa
-    /*
c5e0aa
-     * If found supported devices - set to the configured (high) priority.
c5e0aa
-     * Otherwise - Found only supported transports (which could be exposed by
c5e0aa
-     *             unsupported devices), so set a priority lower than ob1.
c5e0aa
-     */
c5e0aa
-    *priority = (support_level == OPAL_COMMON_UCX_SUPPORT_DEVICE) ?
c5e0aa
-                ompi_pml_ucx.priority : 19;
c5e0aa
-    PML_UCX_VERBOSE(2, "returning priority %d", *priority);
c5e0aa
+    *priority = ompi_pml_ucx.priority;
c5e0aa
     return &ompi_pml_ucx.super;
c5e0aa
 }
c5e0aa
 
c5e0aa
diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c
c5e0aa
index ac7a17d799a5..ae8e66877ab6 100644
c5e0aa
--- a/opal/mca/common/ucx/common_ucx.c
c5e0aa
+++ b/opal/mca/common/ucx/common_ucx.c
c5e0aa
@@ -14,11 +14,8 @@
c5e0aa
 #include "opal/mca/base/mca_base_framework.h"
c5e0aa
 #include "opal/mca/pmix/pmix.h"
c5e0aa
 #include "opal/memoryhooks/memory.h"
c5e0aa
-#include "opal/util/argv.h"
c5e0aa
 
c5e0aa
 #include <ucm/api/ucm.h>
c5e0aa
-#include <fnmatch.h>
c5e0aa
-#include <stdio.h>
c5e0aa
 
c5e0aa
 /***********************************************************************/
c5e0aa
 
c5e0aa
@@ -28,8 +25,7 @@ opal_common_ucx_module_t opal_common_ucx = {
c5e0aa
     .verbose             = 0,
c5e0aa
     .progress_iterations = 100,
c5e0aa
     .registered          = 0,
c5e0aa
-    .opal_mem_hooks      = 0,
c5e0aa
-    .tls                 = NULL
c5e0aa
+    .opal_mem_hooks      = 0
c5e0aa
 };
c5e0aa
 
c5e0aa
 static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
c5e0aa
@@ -40,15 +36,10 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
c5e0aa
 
c5e0aa
 OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component)
c5e0aa
 {
c5e0aa
-    static const char *default_tls     = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,cuda_ipc,rocm_ipc";
c5e0aa
-    static const char *default_devices = "mlx*";
c5e0aa
     static int registered = 0;
c5e0aa
     static int hook_index;
c5e0aa
     static int verbose_index;
c5e0aa
     static int progress_index;
c5e0aa
-    static int tls_index;
c5e0aa
-    static int devices_index;
c5e0aa
-
c5e0aa
     if (!registered) {
c5e0aa
         verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose",
c5e0aa
                                               "Verbose level of the UCX components",
c5e0aa
@@ -69,29 +60,6 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
c5e0aa
                                            OPAL_INFO_LVL_3,
c5e0aa
                                            MCA_BASE_VAR_SCOPE_LOCAL,
c5e0aa
                                            &opal_common_ucx.opal_mem_hooks);
c5e0aa
-
c5e0aa
-        opal_common_ucx.tls  = malloc(sizeof(*opal_common_ucx.tls));
c5e0aa
-        *opal_common_ucx.tls = strdup(default_tls);
c5e0aa
-        tls_index = mca_base_var_register("opal", "opal_common", "ucx", "tls",
c5e0aa
-                                          "List of UCX transports which should be supported on the system, to enable "
c5e0aa
-                                          "selecting the UCX component. Special values: any (any available). "
c5e0aa
-                                          "A '^' prefix negates the list. "
c5e0aa
-                                          "For example, in order to exclude on shared memory and TCP transports, "
c5e0aa
-                                          "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'.",
c5e0aa
-                                          MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
c5e0aa
-                                          OPAL_INFO_LVL_3,
c5e0aa
-                                          MCA_BASE_VAR_SCOPE_LOCAL,
c5e0aa
-                                          opal_common_ucx.tls);
c5e0aa
-
c5e0aa
-        opal_common_ucx.devices  = malloc(sizeof(*opal_common_ucx.devices));
c5e0aa
-        *opal_common_ucx.devices = strdup(default_devices);
c5e0aa
-        devices_index = mca_base_var_register("opal", "opal_common", "ucx", "devices",
c5e0aa
-                                              "List of device driver pattern names, which, if supported by UCX, will "
c5e0aa
-                                              "bump its priority above ob1. Special values: any (any available)",
c5e0aa
-                                              MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
c5e0aa
-                                              OPAL_INFO_LVL_3,
c5e0aa
-                                              MCA_BASE_VAR_SCOPE_LOCAL,
c5e0aa
-                                              opal_common_ucx.devices);
c5e0aa
         registered = 1;
c5e0aa
     }
c5e0aa
     if (component) {
c5e0aa
@@ -107,14 +75,6 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
c5e0aa
                                       component->mca_type_name,
c5e0aa
                                       component->mca_component_name,
c5e0aa
                                       "opal_mem_hooks", 0);
c5e0aa
-        mca_base_var_register_synonym(tls_index, component->mca_project_name,
c5e0aa
-                                      component->mca_type_name,
c5e0aa
-                                      component->mca_component_name,
c5e0aa
-                                      "tls", 0);
c5e0aa
-        mca_base_var_register_synonym(devices_index, component->mca_project_name,
c5e0aa
-                                      component->mca_type_name,
c5e0aa
-                                      component->mca_component_name,
c5e0aa
-                                      "devices", 0);
c5e0aa
     }
c5e0aa
 }
c5e0aa
 
c5e0aa
@@ -163,166 +123,6 @@ OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void)
c5e0aa
     opal_output_close(opal_common_ucx.output);
c5e0aa
 }
c5e0aa
 
c5e0aa
-#if HAVE_DECL_OPEN_MEMSTREAM
c5e0aa
-static bool opal_common_ucx_check_device(const char *device_name, char **device_list)
c5e0aa
-{
c5e0aa
-    char sysfs_driver_link[PATH_MAX];
c5e0aa
-    char driver_path[PATH_MAX];
c5e0aa
-    char *ib_device_name;
c5e0aa
-    char *driver_name;
c5e0aa
-    char **list_item;
c5e0aa
-    ssize_t ret;
c5e0aa
-
c5e0aa
-    /* mlx5_0:1 */
c5e0aa
-    ret = sscanf(device_name, "%m[^:]%*d", &ib_device_name);
c5e0aa
-    if (ret != 1) {
c5e0aa
-        return false;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    sysfs_driver_link[sizeof(sysfs_driver_link) - 1] = '\0';
c5e0aa
-    snprintf(sysfs_driver_link, sizeof(sysfs_driver_link) - 1,
c5e0aa
-             "/sys/class/infiniband/%s/device/driver", ib_device_name);
c5e0aa
-    free(ib_device_name);
c5e0aa
-
c5e0aa
-    driver_path[sizeof(driver_path) - 1] = '\0';
c5e0aa
-    ret = readlink(sysfs_driver_link, driver_path, sizeof(driver_path) - 1);
c5e0aa
-    if (ret < 0) {
c5e0aa
-        MCA_COMMON_UCX_VERBOSE(2, "readlink(%s) failed: %s", sysfs_driver_link,
c5e0aa
-                               strerror(errno));
c5e0aa
-        return false;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    driver_name = basename(driver_path);
c5e0aa
-    for (list_item = device_list; *list_item != NULL; ++list_item) {
c5e0aa
-        if (!fnmatch(*list_item, driver_name, 0)) {
c5e0aa
-            MCA_COMMON_UCX_VERBOSE(2, "driver '%s' matched by '%s'",
c5e0aa
-                                   driver_path, *list_item);
c5e0aa
-            return true;
c5e0aa
-        }
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    return false;
c5e0aa
-}
c5e0aa
-#endif
c5e0aa
-
c5e0aa
-OPAL_DECLSPEC opal_common_ucx_support_level_t
c5e0aa
-opal_common_ucx_support_level(ucp_context_h context)
c5e0aa
-{
c5e0aa
-    opal_common_ucx_support_level_t support_level = OPAL_COMMON_UCX_SUPPORT_NONE;
c5e0aa
-    static const char *support_level_names[] = {
c5e0aa
-        [OPAL_COMMON_UCX_SUPPORT_NONE]      = "none",
c5e0aa
-        [OPAL_COMMON_UCX_SUPPORT_TRANSPORT] = "transports only",
c5e0aa
-        [OPAL_COMMON_UCX_SUPPORT_DEVICE]    = "transports and devices"
c5e0aa
-    };
c5e0aa
-#if HAVE_DECL_OPEN_MEMSTREAM
c5e0aa
-    char *rsc_tl_name, *rsc_device_name;
c5e0aa
-    char **tl_list, **device_list, **list_item;
c5e0aa
-    bool is_any_tl, is_any_device;
c5e0aa
-    bool found_tl, negate;
c5e0aa
-    char line[128];
c5e0aa
-    FILE *stream;
c5e0aa
-    char *buffer;
c5e0aa
-    size_t size;
c5e0aa
-    int ret;
c5e0aa
-#endif
c5e0aa
-
c5e0aa
-    is_any_tl     = !strcmp(*opal_common_ucx.tls, "any");
c5e0aa
-    is_any_device = !strcmp(*opal_common_ucx.devices, "any");
c5e0aa
-
c5e0aa
-    /* Check for special value "any" */
c5e0aa
-    if (is_any_tl && is_any_device) {
c5e0aa
-        MCA_COMMON_UCX_VERBOSE(1, "ucx is enabled on any transport or device",
c5e0aa
-                               *opal_common_ucx.tls);
c5e0aa
-        support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE;
c5e0aa
-        goto out;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-#if HAVE_DECL_OPEN_MEMSTREAM
c5e0aa
-    /* Split transports list */
c5e0aa
-    negate  = ('^' == (*opal_common_ucx.tls)[0]);
c5e0aa
-    tl_list = opal_argv_split(*opal_common_ucx.tls + (negate ? 1 : 0), ',');
c5e0aa
-    if (tl_list == NULL) {
c5e0aa
-        MCA_COMMON_UCX_VERBOSE(1, "failed to split tl list '%s', ucx is disabled",
c5e0aa
-                               *opal_common_ucx.tls);
c5e0aa
-        goto out;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    /* Split devices list */
c5e0aa
-    device_list = opal_argv_split(*opal_common_ucx.devices, ',');
c5e0aa
-    if (device_list == NULL) {
c5e0aa
-        MCA_COMMON_UCX_VERBOSE(1, "failed to split devices list '%s', ucx is disabled",
c5e0aa
-                               *opal_common_ucx.devices);
c5e0aa
-        goto out_free_tl_list;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    /* Open memory stream to dump UCX information to */
c5e0aa
-    stream = open_memstream(&buffer, &size);
c5e0aa
-    if (stream == NULL) {
c5e0aa
-        MCA_COMMON_UCX_VERBOSE(1, "failed to open memory stream for ucx info (%s), "
c5e0aa
-                               "ucx is disabled", strerror(errno));
c5e0aa
-        goto out_free_device_list;
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    /* Print ucx transports information to the memory stream */
c5e0aa
-    ucp_context_print_info(context, stream);
c5e0aa
-
c5e0aa
-    /* Rewind and read transports/devices list from the stream */
c5e0aa
-    fseek(stream, 0, SEEK_SET);
c5e0aa
-    while ((support_level != OPAL_COMMON_UCX_SUPPORT_DEVICE) &&
c5e0aa
-           (fgets(line, sizeof(line), stream) != NULL)) {
c5e0aa
-        rsc_tl_name = NULL;
c5e0aa
-        ret = sscanf(line,
c5e0aa
-                     /* "# resource 6  :  md 5  dev 4  flags -- rc_verbs/mlx5_0:1" */
c5e0aa
-                     "# resource %*d : md %*d dev %*d flags -- %m[^/ \n\r]/%m[^/ \n\r]",
c5e0aa
-                     &rsc_tl_name, &rsc_device_name);
c5e0aa
-        if (ret != 2) {
c5e0aa
-            free(rsc_tl_name);
c5e0aa
-            continue;
c5e0aa
-        }
c5e0aa
-
c5e0aa
-        /* Check if 'rsc_tl_name' is found  provided list */
c5e0aa
-        found_tl = is_any_tl;
c5e0aa
-        for (list_item = tl_list; !found_tl && (*list_item != NULL); ++list_item) {
c5e0aa
-            found_tl = !strcmp(*list_item, rsc_tl_name);
c5e0aa
-        }
c5e0aa
-
c5e0aa
-        /* Check if the transport has a match (either positive or negative) */
c5e0aa
-        assert(!(is_any_tl && negate));
c5e0aa
-        if (found_tl != negate) {
c5e0aa
-            if (is_any_device ||
c5e0aa
-                opal_common_ucx_check_device(rsc_device_name, device_list)) {
c5e0aa
-                MCA_COMMON_UCX_VERBOSE(2, "%s/%s: matched both transport and device list",
c5e0aa
-                                    rsc_tl_name, rsc_device_name);
c5e0aa
-                support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE;
c5e0aa
-            } else {
c5e0aa
-                MCA_COMMON_UCX_VERBOSE(2, "%s/%s: matched transport list but not device list",
c5e0aa
-                                    rsc_tl_name, rsc_device_name);
c5e0aa
-                support_level = OPAL_COMMON_UCX_SUPPORT_TRANSPORT;
c5e0aa
-            }
c5e0aa
-        } else {
c5e0aa
-            MCA_COMMON_UCX_VERBOSE(2, "%s/%s: did not match transport list",
c5e0aa
-                                   rsc_tl_name, rsc_device_name);
c5e0aa
-        }
c5e0aa
-
c5e0aa
-        free(rsc_device_name);
c5e0aa
-        free(rsc_tl_name);
c5e0aa
-    }
c5e0aa
-
c5e0aa
-    MCA_COMMON_UCX_VERBOSE(2, "support level is %s", support_level_names[support_level]);
c5e0aa
-    fclose(stream);
c5e0aa
-    free(buffer);
c5e0aa
-
c5e0aa
-out_free_device_list:
c5e0aa
-    opal_argv_free(device_list);
c5e0aa
-out_free_tl_list:
c5e0aa
-    opal_argv_free(tl_list);
c5e0aa
-out:
c5e0aa
-#else
c5e0aa
-    MCA_COMMON_UCX_VERBOSE(2, "open_memstream() was not found, ucx is disabled");
c5e0aa
-#endif
c5e0aa
-    return support_level;
c5e0aa
-}
c5e0aa
-
c5e0aa
 void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status)
c5e0aa
 {
c5e0aa
 }
c5e0aa
diff --git a/opal/mca/common/ucx/common_ucx.h b/opal/mca/common/ucx/common_ucx.h
c5e0aa
index 92cdd738ef98..202131ac8907 100644
c5e0aa
--- a/opal/mca/common/ucx/common_ucx.h
c5e0aa
+++ b/opal/mca/common/ucx/common_ucx.h
c5e0aa
@@ -88,8 +88,6 @@ typedef struct opal_common_ucx_module {
c5e0aa
     int  progress_iterations;
c5e0aa
     int  registered;
c5e0aa
     bool opal_mem_hooks;
c5e0aa
-    char **tls;
c5e0aa
-    char **devices;
c5e0aa
 } opal_common_ucx_module_t;
c5e0aa
 
c5e0aa
 typedef struct opal_common_ucx_del_proc {
c5e0aa
@@ -97,23 +95,10 @@ typedef struct opal_common_ucx_del_proc {
c5e0aa
     size_t   vpid;
c5e0aa
 } opal_common_ucx_del_proc_t;
c5e0aa
 
c5e0aa
-typedef enum {
c5e0aa
-    /* No supported transports found (according to configured list of supported
c5e0aa
-       transports) */
c5e0aa
-    OPAL_COMMON_UCX_SUPPORT_NONE,
c5e0aa
-
c5e0aa
-    /* Have supported transports but not supported devices */
c5e0aa
-    OPAL_COMMON_UCX_SUPPORT_TRANSPORT,
c5e0aa
-
c5e0aa
-    /* Have both supported transports and supported devices */
c5e0aa
-    OPAL_COMMON_UCX_SUPPORT_DEVICE,
c5e0aa
-} opal_common_ucx_support_level_t;
c5e0aa
-
c5e0aa
 extern opal_common_ucx_module_t opal_common_ucx;
c5e0aa
 
c5e0aa
 OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
c5e0aa
 OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void);
c5e0aa
-OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_context_h context);
c5e0aa
 OPAL_DECLSPEC void opal_common_ucx_mca_proc_added(void);
c5e0aa
 OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
c5e0aa
 OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
c5e0aa
diff --git a/opal/mca/common/ucx/configure.m4 b/opal/mca/common/ucx/configure.m4
c5e0aa
index af8628a889c6..27e07c2005b2 100644
c5e0aa
--- a/opal/mca/common/ucx/configure.m4
c5e0aa
+++ b/opal/mca/common/ucx/configure.m4
c5e0aa
@@ -18,8 +18,6 @@ AC_DEFUN([MCA_opal_common_ucx_CONFIG],[
c5e0aa
                [common_ucx_happy="yes"],
c5e0aa
                [common_ucx_happy="no"])
c5e0aa
 
c5e0aa
-    AC_CHECK_DECLS([open_memstream], [], [], [[#include <stdio.h>]])
c5e0aa
-
c5e0aa
     AS_IF([test "$common_ucx_happy" = "yes"],
c5e0aa
           [$1],
c5e0aa
           [$2])
c5e0aa
-- 
c5e0aa
2.31.1
c5e0aa