Blame SOURCES/0001-Revert-prov-psm2-Avoid-long-delay-in-psm2_ep_close.patch

85fa4a
From dafc07e38c7c2af1dec371276dec08da39e1636a Mon Sep 17 00:00:00 2001
85fa4a
From: Jianxin Xiong <jianxin.xiong@intel.com>
85fa4a
Date: Wed, 12 Sep 2018 08:53:32 -0700
85fa4a
Subject: [PATCH] Revert "prov/psm2: Avoid long delay in psm2_ep_close"
85fa4a
85fa4a
This reverts commit 7741df0db37085c1a49c05185c9b3f8170981661.
85fa4a
85fa4a
Sporadic assertion failures have been observed inside psm2_ep_disconnect2().
85fa4a
Disable the patch until the issue is fixed.
85fa4a
85fa4a
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
85fa4a
---
85fa4a
 prov/psm2/src/psmx2_av.c       | 79 ++++++++++++++++++------------------------
85fa4a
 prov/psm2/src/psmx2_trx_ctxt.c | 32 +++--------------
85fa4a
 2 files changed, 39 insertions(+), 72 deletions(-)
85fa4a
85fa4a
diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c
85fa4a
index 815407405..63c6e5beb 100644
85fa4a
--- a/prov/psm2/src/psmx2_av.c
85fa4a
+++ b/prov/psm2/src/psmx2_av.c
85fa4a
@@ -32,40 +32,6 @@
85fa4a
 
85fa4a
 #include "psmx2.h"
85fa4a
 
85fa4a
-static void psmx2_set_epaddr_context(struct psmx2_trx_ctxt *trx_ctxt,
85fa4a
-				     psm2_epid_t epid, psm2_epaddr_t epaddr)
85fa4a
-{
85fa4a
-	struct psmx2_epaddr_context *context;
85fa4a
-
85fa4a
-	context = (void *)psm2_epaddr_getctxt(epaddr);
85fa4a
-	if (context) {
85fa4a
-		if (context->trx_ctxt != trx_ctxt || context->epid != epid) {
85fa4a
-			FI_WARN(&psmx2_prov, FI_LOG_AV,
85fa4a
-				"trx_ctxt or epid doesn't match\n");
85fa4a
-			context = NULL;
85fa4a
-		}
85fa4a
-	}
85fa4a
-
85fa4a
-	if (context)
85fa4a
-		return;
85fa4a
-
85fa4a
-	context = malloc(sizeof *context);
85fa4a
-	if (!context) {
85fa4a
-		FI_WARN(&psmx2_prov, FI_LOG_AV,
85fa4a
-			"cannot allocate context\n");
85fa4a
-		return;
85fa4a
-	}
85fa4a
-
85fa4a
-	context->trx_ctxt = trx_ctxt;
85fa4a
-	context->epid = epid;
85fa4a
-	context->epaddr = epaddr;
85fa4a
-	psm2_epaddr_setctxt(epaddr, context);
85fa4a
-
85fa4a
-	psmx2_lock(&trx_ctxt->peer_lock, 2);
85fa4a
-	dlist_insert_before(&context->entry, &trx_ctxt->peer_list);
85fa4a
-	psmx2_unlock(&trx_ctxt->peer_lock, 2);
85fa4a
-}
85fa4a
-
85fa4a
 /*
85fa4a
  * SEP address query protocol:
85fa4a
  *
85fa4a
@@ -118,8 +84,6 @@ int psmx2_am_sep_handler(psm2_am_token_t token, psm2_amarg_t *args,
85fa4a
 	struct psmx2_fid_sep *sep;
85fa4a
 	struct psmx2_sep_query *req;
85fa4a
 	struct psmx2_fid_av *av;
85fa4a
-	psm2_epaddr_t src_epaddr;
85fa4a
-	psm2_epid_t src_epid;
85fa4a
 	psm2_epid_t *epids;
85fa4a
 	psm2_epid_t *buf = NULL;
85fa4a
 	int buflen;
85fa4a
@@ -129,15 +93,6 @@ int psmx2_am_sep_handler(psm2_am_token_t token, psm2_amarg_t *args,
85fa4a
 	cmd = PSMX2_AM_GET_OP(args[0].u32w0);
85fa4a
 	domain = trx_ctxt->domain;
85fa4a
 
85fa4a
-	/*
85fa4a
-	 * the implicit connection to the AM source needs also to be disconnected
85fa4a
-	 * to avoid long delay inside psm2_ep_close. make sure the source is added
85fa4a
-	 * to the peer list.
85fa4a
-	 */
85fa4a
-	psm2_am_get_source(token, &src_epaddr);
85fa4a
-	psm2_epaddr_to_epid(src_epaddr, &src_epid);
85fa4a
-	psmx2_set_epaddr_context(trx_ctxt, src_epid, src_epaddr);
85fa4a
-
85fa4a
 	switch (cmd) {
85fa4a
 	case PSMX2_AM_REQ_SEP_QUERY:
85fa4a
 		sep_id = args[0].u32w1;
85fa4a
@@ -226,6 +181,40 @@ static inline double psmx2_conn_timeout(int sec)
85fa4a
 	return sec * 1e9;
85fa4a
 }
85fa4a
 
85fa4a
+static void psmx2_set_epaddr_context(struct psmx2_trx_ctxt *trx_ctxt,
85fa4a
+				     psm2_epid_t epid, psm2_epaddr_t epaddr)
85fa4a
+{
85fa4a
+	struct psmx2_epaddr_context *context;
85fa4a
+
85fa4a
+	context = (void *)psm2_epaddr_getctxt(epaddr);
85fa4a
+	if (context) {
85fa4a
+		if (context->trx_ctxt != trx_ctxt || context->epid != epid) {
85fa4a
+			FI_WARN(&psmx2_prov, FI_LOG_AV,
85fa4a
+				"trx_ctxt or epid doesn't match\n");
85fa4a
+			context = NULL;
85fa4a
+		}
85fa4a
+	}
85fa4a
+
85fa4a
+	if (context)
85fa4a
+		return;
85fa4a
+
85fa4a
+	context = malloc(sizeof *context);
85fa4a
+	if (!context) {
85fa4a
+		FI_WARN(&psmx2_prov, FI_LOG_AV,
85fa4a
+			"cannot allocate context\n");
85fa4a
+		return;
85fa4a
+	}
85fa4a
+
85fa4a
+	context->trx_ctxt = trx_ctxt;
85fa4a
+	context->epid = epid;
85fa4a
+	context->epaddr = epaddr;
85fa4a
+	psm2_epaddr_setctxt(epaddr, context);
85fa4a
+
85fa4a
+	psmx2_lock(&trx_ctxt->peer_lock, 2);
85fa4a
+	dlist_insert_before(&context->entry, &trx_ctxt->peer_list);
85fa4a
+	psmx2_unlock(&trx_ctxt->peer_lock, 2);
85fa4a
+}
85fa4a
+
85fa4a
 int psmx2_epid_to_epaddr(struct psmx2_trx_ctxt *trx_ctxt,
85fa4a
 			 psm2_epid_t epid, psm2_epaddr_t *epaddr)
85fa4a
 {
85fa4a
diff --git a/prov/psm2/src/psmx2_trx_ctxt.c b/prov/psm2/src/psmx2_trx_ctxt.c
85fa4a
index 6dd3196e5..709ced94f 100644
85fa4a
--- a/prov/psm2/src/psmx2_trx_ctxt.c
85fa4a
+++ b/prov/psm2/src/psmx2_trx_ctxt.c
85fa4a
@@ -124,10 +124,6 @@ void psmx2_trx_ctxt_disconnect_peers(struct psmx2_trx_ctxt *trx_ctxt)
85fa4a
 	struct psmx2_epaddr_context *peer;
85fa4a
 	struct dlist_entry peer_list;
85fa4a
 	psm2_amarg_t arg;
85fa4a
-	psm2_epaddr_t *epaddrs;
85fa4a
-	psm2_error_t *errors;
85fa4a
-	int peer_count = 0;
85fa4a
-	int i = 0;
85fa4a
 
85fa4a
 	arg.u32w0 = PSMX2_AM_REQ_TRX_CTXT_DISCONNECT;
85fa4a
 
85fa4a
@@ -137,36 +133,17 @@ void psmx2_trx_ctxt_disconnect_peers(struct psmx2_trx_ctxt *trx_ctxt)
85fa4a
 	dlist_foreach_safe(&trx_ctxt->peer_list, item, tmp) {
85fa4a
 		dlist_remove(item);
85fa4a
 		dlist_insert_before(item, &peer_list);
85fa4a
-		peer_count++;
85fa4a
 	}
85fa4a
 	psmx2_unlock(&trx_ctxt->peer_lock, 2);
85fa4a
 
85fa4a
-	if (!peer_count)
85fa4a
-		return;
85fa4a
-
85fa4a
-	epaddrs = malloc(peer_count * sizeof(*epaddrs));
85fa4a
-	errors = malloc(peer_count * sizeof(*errors));
85fa4a
-
85fa4a
 	dlist_foreach_safe(&peer_list, item, tmp) {
85fa4a
 		peer = container_of(item, struct psmx2_epaddr_context, entry);
85fa4a
-		if (epaddrs)
85fa4a
-			epaddrs[i++] = peer->epaddr;
85fa4a
-		if (psmx2_env.disconnect) {
85fa4a
-			FI_INFO(&psmx2_prov, FI_LOG_CORE, "epaddr: %p\n", peer->epaddr);
85fa4a
-			psm2_am_request_short(peer->epaddr, PSMX2_AM_TRX_CTXT_HANDLER,
85fa4a
-					      &arg, 1, NULL, 0, 0, NULL, NULL);
85fa4a
-		}
85fa4a
+		FI_INFO(&psmx2_prov, FI_LOG_CORE, "epaddr: %p\n", peer->epaddr);
85fa4a
+		psm2_am_request_short(peer->epaddr, PSMX2_AM_TRX_CTXT_HANDLER,
85fa4a
+				      &arg, 1, NULL, 0, 0, NULL, NULL);
85fa4a
 		psm2_epaddr_setctxt(peer->epaddr, NULL);
85fa4a
 		free(peer);
85fa4a
 	}
85fa4a
-
85fa4a
-	/* disconnect locally to avoid long delay inside psm2_ep_close() */
85fa4a
-	if (epaddrs && errors)
85fa4a
-		psm2_ep_disconnect2(trx_ctxt->psm2_ep, peer_count, epaddrs, NULL,
85fa4a
-				    errors, PSM2_EP_DISCONNECT_FORCE, 0);
85fa4a
-
85fa4a
-	free(errors);
85fa4a
-	free(epaddrs);
85fa4a
 }
85fa4a
 
85fa4a
 static const char *psmx2_usage_flags_to_string(int usage_flags)
85fa4a
@@ -203,7 +180,8 @@ void psmx2_trx_ctxt_free(struct psmx2_trx_ctxt *trx_ctxt, int usage_flags)
85fa4a
 	dlist_remove(&trx_ctxt->entry);
85fa4a
 	psmx2_unlock(&trx_ctxt->domain->trx_ctxt_lock, 1);
85fa4a
 
85fa4a
-	psmx2_trx_ctxt_disconnect_peers(trx_ctxt);
85fa4a
+	if (psmx2_env.disconnect)
85fa4a
+		psmx2_trx_ctxt_disconnect_peers(trx_ctxt);
85fa4a
 
85fa4a
 	if (trx_ctxt->am_initialized)
85fa4a
 		psmx2_am_fini(trx_ctxt);
85fa4a
-- 
85fa4a
2.14.4
85fa4a