andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 months ago
Clone
dc8c34
From db0e1a7eba9926630e0499ab9a843c09f649e6c2 Mon Sep 17 00:00:00 2001
dc8c34
From: Mark Reynolds <mreynolds@redhat.com>
dc8c34
Date: Tue, 8 Apr 2014 14:39:47 -0400
dc8c34
Subject: [PATCH 196/225] Ticket 47771 - Performing deletes during tombstone
dc8c34
 purging results in operation errors
dc8c34
dc8c34
Bug Description:  An operations error can occur when deleting entry while
dc8c34
                  tombstone purging is happening.  The error occurs when it
dc8c34
                  tries the lock the parent entry, but the parent entry was
dc8c34
                  replaced in the cache before it could be locked.
dc8c34
dc8c34
Fix Description:  Return a special error code when cache_lock_entry fails because
dc8c34
                  the entry was marked as deleted.  Then try to grab the entry
dc8c34
                  again and lock it.
dc8c34
dc8c34
https://fedorahosted.org/389/ticket/47771
dc8c34
dc8c34
Reviewed by: rmeggins & nhosoi(Thanks!!)
dc8c34
(cherry picked from commit c5f22dd6c278f670fc36af8029d5c28a89051cfa)
dc8c34
---
dc8c34
 ldap/servers/slapd/back-ldbm/back-ldbm.h   |  1 +
dc8c34
 ldap/servers/slapd/back-ldbm/cache.c       |  6 +++--
dc8c34
 ldap/servers/slapd/back-ldbm/ldbm_delete.c | 38 ++++++++++++++++++++++++------
dc8c34
 3 files changed, 36 insertions(+), 9 deletions(-)
dc8c34
dc8c34
diff --git a/ldap/servers/slapd/back-ldbm/back-ldbm.h b/ldap/servers/slapd/back-ldbm/back-ldbm.h
dc8c34
index 7e5a261..8ad3c20 100644
dc8c34
--- a/ldap/servers/slapd/back-ldbm/back-ldbm.h
dc8c34
+++ b/ldap/servers/slapd/back-ldbm/back-ldbm.h
dc8c34
@@ -215,6 +215,7 @@ typedef unsigned short u_int16_t;
dc8c34
 #define DEFAULT_IMPORT_INDEX_BUFFER_SIZE  0
dc8c34
 #define SUBLEN                   3
dc8c34
 #define LDBM_CACHE_RETRY_COUNT 1000 /* Number of times we re-try a cache operation */
dc8c34
+#define RETRY_CACHE_LOCK 2 /* error code to signal a retry of the cache lock */
dc8c34
 #define IDL_FETCH_RETRY_COUNT 5 /* Number of times we re-try idl_fetch if it returns deadlock */
dc8c34
 #define IMPORT_SUBCOUNT_HASHTABLE_SIZE 500 /* Number of buckets in hash used to accumulate subcount for broody parents */
dc8c34
 
dc8c34
diff --git a/ldap/servers/slapd/back-ldbm/cache.c b/ldap/servers/slapd/back-ldbm/cache.c
dc8c34
index d97644f..865f1ef 100644
dc8c34
--- a/ldap/servers/slapd/back-ldbm/cache.c
dc8c34
+++ b/ldap/servers/slapd/back-ldbm/cache.c
dc8c34
@@ -1471,7 +1471,9 @@ void cache_unlock(struct cache *cache)
dc8c34
 
dc8c34
 /* locks an entry so that it can be modified (you should have gotten the
dc8c34
  * entry via cache_find_*).
dc8c34
- * returns 0 on success, 1 if the entry is scheduled for deletion.
dc8c34
+ * returns 0 on success,
dc8c34
+ * returns 1 if the entry lock could not be created
dc8c34
+ * returns 2 (RETRY_CACHE_LOCK) if the entry is scheduled for deletion.
dc8c34
  */
dc8c34
 int cache_lock_entry(struct cache *cache, struct backentry *e)
dc8c34
 {
dc8c34
@@ -1503,7 +1505,7 @@ int cache_lock_entry(struct cache *cache, struct backentry *e)
dc8c34
        PR_Unlock(cache->c_mutex);
dc8c34
        PR_ExitMonitor(e->ep_mutexp);
dc8c34
        LOG("<= cache_lock_entry (DELETED)\n", 0, 0, 0);
dc8c34
-       return 1;
dc8c34
+       return RETRY_CACHE_LOCK;
dc8c34
     }
dc8c34
     PR_Unlock(cache->c_mutex);
dc8c34
 
dc8c34
diff --git a/ldap/servers/slapd/back-ldbm/ldbm_delete.c b/ldap/servers/slapd/back-ldbm/ldbm_delete.c
dc8c34
index a4b8d8e..7c036df 100644
dc8c34
--- a/ldap/servers/slapd/back-ldbm/ldbm_delete.c
dc8c34
+++ b/ldap/servers/slapd/back-ldbm/ldbm_delete.c
dc8c34
@@ -335,14 +335,37 @@ ldbm_back_delete( Slapi_PBlock *pb )
dc8c34
 			 * (find_entry2modify_only_ext), a wrong parent could be found,
dc8c34
 			 * and numsubordinate count could get confused.
dc8c34
 			 */
dc8c34
-			ID pid = (ID)strtol(pid_str, (char **)NULL, 10);
dc8c34
+			ID pid;
dc8c34
+			int cache_retry_count = 0;
dc8c34
+			int cache_retry = 0;
dc8c34
+
dc8c34
+			pid = (ID)strtol(pid_str, (char **)NULL, 10);
dc8c34
 			slapi_ch_free_string(&pid_str);
dc8c34
-			parent = id2entry(be, pid ,NULL, &retval);
dc8c34
-			if (parent && cache_lock_entry(&inst->inst_cache, parent)) {
dc8c34
-				/* Failed to obtain parent entry's entry lock */
dc8c34
-				CACHE_RETURN(&(inst->inst_cache), &parent);
dc8c34
-				retval = -1;
dc8c34
-				goto error_return;
dc8c34
+
dc8c34
+			/*
dc8c34
+			 * Its possible that the parent entry retrieved from the cache in id2entry
dc8c34
+			 * could be removed before we lock it, because tombstone purging updated/replaced
dc8c34
+			 * the parent.  If we fail to lock the entry, just try again.
dc8c34
+			 */
dc8c34
+			while(1){
dc8c34
+				parent = id2entry(be, pid ,NULL, &retval);
dc8c34
+				if (parent && (cache_retry = cache_lock_entry(&inst->inst_cache, parent))) {
dc8c34
+					/* Failed to obtain parent entry's entry lock */
dc8c34
+					if(cache_retry == RETRY_CACHE_LOCK &&
dc8c34
+					   cache_retry_count < LDBM_CACHE_RETRY_COUNT)
dc8c34
+					{
dc8c34
+						/* try again */
dc8c34
+						DS_Sleep(PR_MillisecondsToInterval(100));
dc8c34
+						cache_retry_count++;
dc8c34
+						continue;
dc8c34
+					}
dc8c34
+					retval = -1;
dc8c34
+					CACHE_RETURN(&(inst->inst_cache), &parent);
dc8c34
+					goto error_return;
dc8c34
+				} else {
dc8c34
+					/* entry locked, move on */
dc8c34
+					break;
dc8c34
+				}
dc8c34
 			}
dc8c34
 		}
dc8c34
 		if (NULL == parent) {
dc8c34
@@ -1224,6 +1247,7 @@ diskfull_return:
dc8c34
 	slapi_ch_free((void**)&errbuf);
dc8c34
 	slapi_sdn_done(&nscpEntrySDN);
dc8c34
 	slapi_ch_free_string(&e_uniqueid);
dc8c34
+	slapi_sdn_done(&parentsdn);
dc8c34
 	if (pb->pb_conn)
dc8c34
 	{
dc8c34
 		slapi_log_error (SLAPI_LOG_TRACE, "ldbm_back_delete", "leave conn=%" NSPRIu64 " op=%d\n", pb->pb_conn->c_connid, operation->o_opid);
dc8c34
-- 
dc8c34
1.8.1.4
dc8c34