Blame SOURCES/0066-Ticket-49079-deadlock-on-cos-cache-rebuild.patch

723150
From 5391c666e58af5841eab88c98505f99c8ed20d6b Mon Sep 17 00:00:00 2001
723150
From: Thierry Bordaz <tbordaz@redhat.com>
723150
Date: Tue, 10 Jan 2017 14:32:53 +0100
723150
Subject: [PATCH 66/67] Ticket 49079: deadlock on cos cache rebuild
723150
723150
Bug Description:
723150
    To rebuild the cache cos_cache_creation the thread gets cos definitions from backend.
723150
    It means change_lock is held then cos_cache_creation will acquire some backend pages.
723150
723150
    A deadlock can happen if cos_post_op is called while backend is locked.
723150
    For example if a bepreop (urp) does an internal update on a cos definition.
723150
    Then the thread holds backend pages, that will be needed by cos_cache_creation,
723150
    and will acquire change_lock for notification of the cos_cache thread
723150
723150
Fix Description:
723150
723150
    Let cos cache rebuild thread run without holding change_lock.
723150
    The lock prevents parallel run but a flag can do the same.
723150
723150
https://fedorahosted.org/389/ticket/49079
723150
723150
Reviewed by: William Brown and Ludwig Krispenz (thanks to you both !!)
723150
723150
Platforms tested: F23
723150
723150
Flag Day: no
723150
723150
Doc impact: no
723150
723150
(cherry picked from commit ac44337bd97fe63071e7d83e9dcd788f2af1feab)
723150
(cherry picked from commit 3ac12cb94a8873b0fa4ddb12f924cc58bd9c9872)
723150
---
723150
 ldap/servers/plugins/cos/cos_cache.c | 73 ++++++++++++++++++++++++++++++------
723150
 1 file changed, 61 insertions(+), 12 deletions(-)
723150
723150
diff --git a/ldap/servers/plugins/cos/cos_cache.c b/ldap/servers/plugins/cos/cos_cache.c
723150
index 8a32630..87b4ba5 100644
723150
--- a/ldap/servers/plugins/cos/cos_cache.c
723150
+++ b/ldap/servers/plugins/cos/cos_cache.c
723150
@@ -111,7 +111,9 @@ void * cos_get_plugin_identity();
723150
 /* the global plugin handle */
723150
 static volatile vattr_sp_handle *vattr_handle = NULL;
723150
 
723150
+/* both variables are protected by change_lock */
723150
 static int cos_cache_notify_flag = 0;
723150
+static PRBool cos_cache_at_work = PR_FALSE;
723150
 
723150
 /* service definition cache structs */
723150
 
723150
@@ -199,7 +201,8 @@ typedef struct _cos_cache cosCache;
723150
 static cosCache *pCache; /* always the current global cache, only use getref to get */
723150
 
723150
 /* the place to start if you want a new cache */
723150
-static int cos_cache_create();
723150
+static int cos_cache_create_unlock(void);
723150
+static int cos_cache_creation_lock(void);
723150
 
723150
 /* cache index related functions */
723150
 static int cos_cache_index_all(cosCache *pCache);
723150
@@ -386,7 +389,7 @@ static void cos_cache_wait_on_change(void *arg)
723150
 	pCache = 0;
723150
 
723150
 	/* create initial cache */
723150
-	cos_cache_create();
723150
+	cos_cache_creation_lock();
723150
 
723150
         slapi_lock_mutex(start_lock);
723150
         started = 1;
723150
@@ -419,7 +422,7 @@ static void cos_cache_wait_on_change(void *arg)
723150
 		 * before we go running off doing lots of stuff lets check if we should stop
723150
 		*/
723150
 		if(keeprunning) {
723150
-			cos_cache_create();				
723150
+			cos_cache_creation_lock();
723150
 		}
723150
 		cos_cache_notify_flag = 0; /* Dealt with it */
723150
 	}/* while */
723150
@@ -431,22 +434,25 @@ static void cos_cache_wait_on_change(void *arg)
723150
 	LDAPDebug( LDAP_DEBUG_TRACE, "<-- cos_cache_wait_on_change thread exit\n",0,0,0);
723150
 }
723150
 
723150
+
723150
 /*
723150
-	cos_cache_create
723150
+	cos_cache_create_unlock
723150
 	---------------------
723150
 	Walks the definitions in the DIT and creates the cache.
723150
 	Once created, it swaps the new cache for the old one,
723150
 	releasing its refcount to the old cache and allowing it
723150
 	to be destroyed.
723150
+
723150
+        called while change_lock is NOT held
723150
 */
723150
-static int cos_cache_create()
723150
+static int cos_cache_create_unlock(void)
723150
 {
723150
 	int ret = -1;
723150
 	cosCache *pNewCache;
723150
 	static int firstTime = 1;
723150
 	int cache_built = 0;
723150
 
723150
-	LDAPDebug( LDAP_DEBUG_TRACE, "--> cos_cache_create\n",0,0,0);
723150
+	LDAPDebug( LDAP_DEBUG_TRACE, "--> cos_cache_create_unlock\n",0,0,0);
723150
 
723150
 	pNewCache = (cosCache*)slapi_ch_malloc(sizeof(cosCache));
723150
 	if(pNewCache)
723150
@@ -509,21 +515,21 @@ static int cos_cache_create()
723150
 				{
723150
 					/* we should not go on without proper schema checking */
723150
 					cos_cache_release(pNewCache);
723150
-					LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create: failed to cache the schema\n",0,0,0);			
723150
+					LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create_unlock: failed to cache the schema\n",0,0,0);			
723150
 				}
723150
 			}
723150
 			else
723150
 			{
723150
 				/* currently we cannot go on without the indexes */
723150
 				cos_cache_release(pNewCache);
723150
-				LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create: failed to index cache\n",0,0,0);			
723150
+				LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create_unlock: failed to index cache\n",0,0,0);			
723150
 			}
723150
 		}
723150
 		else
723150
 		{
723150
 			if(firstTime)
723150
 			{
723150
-				LDAPDebug( LDAP_DEBUG_PLUGIN, "cos_cache_create: cos disabled\n",0,0,0);
723150
+				LDAPDebug( LDAP_DEBUG_PLUGIN, "cos_cache_create_unlock: cos disabled\n",0,0,0);
723150
 				firstTime = 0;
723150
 			}
723150
 
723150
@@ -531,7 +537,7 @@ static int cos_cache_create()
723150
 		}
723150
 	}
723150
 	else
723150
-		LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create: memory allocation failure\n",0,0,0);
723150
+		LDAPDebug( LDAP_DEBUG_ANY, "cos_cache_create_unlock: memory allocation failure\n",0,0,0);
723150
 
723150
 
723150
 	/* make sure we have a new cache */
723150
@@ -563,10 +569,53 @@ static int cos_cache_create()
723150
 		
723150
 	}
723150
 
723150
-	LDAPDebug( LDAP_DEBUG_TRACE, "<-- cos_cache_create\n",0,0,0);
723150
+	LDAPDebug( LDAP_DEBUG_TRACE, "<-- cos_cache_create_unlock\n",0,0,0);
723150
 	return ret;
723150
 }
723150
 
723150
+/* cos_cache_creation_lock is called with change_lock being hold:
723150
+ *    slapi_lock_mutex(change_lock)
723150
+ *
723150
+ * To rebuild the cache cos_cache_creation gets cos definitions from backend, that
723150
+ * means change_lock is held then cos_cache_creation will acquire some backend pages.
723150
+ *
723150
+ * A deadlock can happen if cos_post_op is called while backend is locked. 
723150
+ * For example if a bepreop (urp) does an internal update on a cos definition,
723150
+ * the thread holds backend pages that will be needed by cos_cache_creation.
723150
+ *
723150
+ * A solution is to use a flag 'cos_cache_at_work' protected by change_lock,
723150
+ * release change_lock, recreate the cos_cache, acquire change_lock reset the flag.
723150
+ *
723150
+ * returned value: result of cos_cache_create_unlock
723150
+ *
723150
+ */
723150
+static int cos_cache_creation_lock(void)
723150
+{
723150
+	int ret = -1;
723150
+	int max_tries = 10;
723150
+
723150
+	for (; max_tries != 0; max_tries--) {
723150
+		/* if the cos_cache is already under work (cos_cache_create_unlock)
723150
+		 * wait 1 second
723150
+		 */
723150
+		if (cos_cache_at_work) {
723150
+			slapi_log_error(SLAPI_LOG_FATAL, COS_PLUGIN_SUBSYSTEM, "--> cos_cache_creation_lock already rebuilding cos_cache... retry\n");
723150
+			DS_Sleep (PR_MillisecondsToInterval(1000));
723150
+			continue;
723150
+		}
723150
+		cos_cache_at_work = PR_TRUE;
723150
+		slapi_unlock_mutex(change_lock);
723150
+		ret = cos_cache_create_unlock();
723150
+		slapi_lock_mutex(change_lock);
723150
+		cos_cache_at_work = PR_FALSE;
723150
+		break;
723150
+	}
723150
+	if (!max_tries) {
723150
+		slapi_log_error(SLAPI_LOG_FATAL, COS_PLUGIN_SUBSYSTEM, "--> cos_cache_creation_lock  rebuilt was to long, skip this rebuild\n");
723150
+	}
723150
+
723150
+	return ret;
723150
+}
723150
 
723150
 /*
723150
 	cos_cache_build_definition_list
723150
@@ -1639,7 +1688,7 @@ int cos_cache_getref(cos_cache **pptheCache)
723150
 		slapi_lock_mutex(change_lock);
723150
 		if(pCache == NULL)
723150
 		{
723150
-			if(cos_cache_create())
723150
+			if(cos_cache_creation_lock())
723150
 			{
723150
 				/* there was a problem or no COS definitions were found */
723150
 				LDAPDebug( LDAP_DEBUG_PLUGIN, "cos_cache_getref: no cos cache created\n",0,0,0);
723150
-- 
723150
2.9.3
723150