|
|
a2f18f |
From 1acfdbb4428c70f7f6058da4374ecb29f9bb3149 Mon Sep 17 00:00:00 2001
|
|
|
a2f18f |
From: Mark Reynolds <mreynolds@redhat.com>
|
|
|
a2f18f |
Date: Fri, 17 Jul 2015 15:08:00 -0400
|
|
|
a2f18f |
Subject: [PATCH 26/30] Ticket 48179 - Starting a replica agreement can lead to
|
|
|
a2f18f |
deadlock
|
|
|
a2f18f |
|
|
|
a2f18f |
Bug Description: When starting a replica agreement and setting the agmt maxcsn
|
|
|
a2f18f |
a deadlock can occur with another op updating nsuniqueid index.
|
|
|
a2f18f |
When setting the agmt maxcsn the server searches for the tombstone
|
|
|
a2f18f |
ruv which uses the nsuniqueid index, and it does this while holding
|
|
|
a2f18f |
the repl agmt lock. If another thread is doing a delete and
|
|
|
a2f18f |
writing to the change log, it can also grab a write lock on the
|
|
|
a2f18f |
nsuniqueid index, before it attempts to grab the agmt lock. This
|
|
|
a2f18f |
can lead to a deadlock if the timing is right.
|
|
|
a2f18f |
|
|
|
a2f18f |
Fix Description: When starting the agmt and setting the agmt maxcsn, search/get
|
|
|
a2f18f |
the tombstone ruv before we take the repl agmt lock.
|
|
|
a2f18f |
|
|
|
a2f18f |
https://fedorahosted.org/389/ticket/48179
|
|
|
a2f18f |
|
|
|
a2f18f |
Reviewed by: nhosoi(Thanks!)
|
|
|
a2f18f |
|
|
|
a2f18f |
(cherry picked from commit eb3086dcb0c56a23d6cee00a12f38b2584fe59a2)
|
|
|
a2f18f |
(cherry picked from commit 23a3ff6082cba3eb749401eff44942b16dc30538)
|
|
|
a2f18f |
---
|
|
|
a2f18f |
ldap/servers/plugins/replication/repl5.h | 1 -
|
|
|
a2f18f |
ldap/servers/plugins/replication/repl5_agmt.c | 211 ++++++++++++--------------
|
|
|
a2f18f |
2 files changed, 101 insertions(+), 111 deletions(-)
|
|
|
a2f18f |
|
|
|
a2f18f |
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
|
|
|
a2f18f |
index 4a5d859..0b0f26b 100644
|
|
|
a2f18f |
--- a/ldap/servers/plugins/replication/repl5.h
|
|
|
a2f18f |
+++ b/ldap/servers/plugins/replication/repl5.h
|
|
|
a2f18f |
@@ -380,7 +380,6 @@ PRUint64 agmt_get_protocol_timeout(Repl_Agmt *agmt);
|
|
|
a2f18f |
void agmt_set_protocol_timeout(Repl_Agmt *agmt, PRUint64 timeout);
|
|
|
a2f18f |
void agmt_update_maxcsn(Replica *r, Slapi_DN *sdn, int op, LDAPMod **mods, CSN *csn);
|
|
|
a2f18f |
void add_agmt_maxcsns(Slapi_Entry *e, Replica *r);
|
|
|
a2f18f |
-void agmt_set_maxcsn(Repl_Agmt *ra);
|
|
|
a2f18f |
void agmt_remove_maxcsn(Repl_Agmt *ra);
|
|
|
a2f18f |
int agmt_maxcsn_to_smod (Replica *r, Slapi_Mod *smod);
|
|
|
a2f18f |
int agmt_set_WaitForAsyncResults(Repl_Agmt *ra, const Slapi_Entry *e);
|
|
|
a2f18f |
diff --git a/ldap/servers/plugins/replication/repl5_agmt.c b/ldap/servers/plugins/replication/repl5_agmt.c
|
|
|
a2f18f |
index 9d1a8f2..f84eacb 100644
|
|
|
a2f18f |
--- a/ldap/servers/plugins/replication/repl5_agmt.c
|
|
|
a2f18f |
+++ b/ldap/servers/plugins/replication/repl5_agmt.c
|
|
|
a2f18f |
@@ -668,43 +668,127 @@ int
|
|
|
a2f18f |
agmt_start(Repl_Agmt *ra)
|
|
|
a2f18f |
{
|
|
|
a2f18f |
Repl_Protocol *prot = NULL;
|
|
|
a2f18f |
+ Slapi_PBlock *pb = NULL;
|
|
|
a2f18f |
+ Slapi_Entry **entries = NULL;
|
|
|
a2f18f |
+ Slapi_DN *repl_sdn = NULL;
|
|
|
a2f18f |
+ char *attrs[2];
|
|
|
a2f18f |
+ int protocol_state;
|
|
|
a2f18f |
+ int found_ruv = 0;
|
|
|
a2f18f |
+ int rc = 0;
|
|
|
a2f18f |
|
|
|
a2f18f |
- int protocol_state;
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- /* To Allow Consumer Initialisation when adding an agreement: */
|
|
|
a2f18f |
- if (ra->auto_initialize == STATE_PERFORMING_TOTAL_UPDATE)
|
|
|
a2f18f |
- {
|
|
|
a2f18f |
- protocol_state = STATE_PERFORMING_TOTAL_UPDATE;
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- else
|
|
|
a2f18f |
- {
|
|
|
a2f18f |
- protocol_state = STATE_PERFORMING_INCREMENTAL_UPDATE;
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
+ /* To Allow Consumer Initialisation when adding an agreement: */
|
|
|
a2f18f |
+ if (ra->auto_initialize == STATE_PERFORMING_TOTAL_UPDATE){
|
|
|
a2f18f |
+ protocol_state = STATE_PERFORMING_TOTAL_UPDATE;
|
|
|
a2f18f |
+ } else {
|
|
|
a2f18f |
+ protocol_state = STATE_PERFORMING_INCREMENTAL_UPDATE;
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
|
|
|
a2f18f |
/* First, create a new protocol object */
|
|
|
a2f18f |
if ((prot = prot_new(ra, protocol_state)) == NULL) {
|
|
|
a2f18f |
return -1;
|
|
|
a2f18f |
}
|
|
|
a2f18f |
|
|
|
a2f18f |
- /* Now it is safe to own the agreement lock */
|
|
|
a2f18f |
+ /*
|
|
|
a2f18f |
+ * Set the agmt maxcsn
|
|
|
a2f18f |
+ *
|
|
|
a2f18f |
+ * We need to get the replica ruv before we take the
|
|
|
a2f18f |
+ * agmt lock to avoid potential deadlocks on the nsuniqueid
|
|
|
a2f18f |
+ * index.
|
|
|
a2f18f |
+ */
|
|
|
a2f18f |
+ repl_sdn = agmt_get_replarea(ra);
|
|
|
a2f18f |
+
|
|
|
a2f18f |
+ pb = slapi_pblock_new();
|
|
|
a2f18f |
+ attrs[0] = (char*)type_agmtMaxCSN;
|
|
|
a2f18f |
+ attrs[1] = NULL;
|
|
|
a2f18f |
+ slapi_search_internal_set_pb_ext(
|
|
|
a2f18f |
+ pb,
|
|
|
a2f18f |
+ repl_sdn,
|
|
|
a2f18f |
+ LDAP_SCOPE_BASE,
|
|
|
a2f18f |
+ "objectclass=*",
|
|
|
a2f18f |
+ attrs,
|
|
|
a2f18f |
+ 0,
|
|
|
a2f18f |
+ NULL,
|
|
|
a2f18f |
+ RUV_STORAGE_ENTRY_UNIQUEID,
|
|
|
a2f18f |
+ repl_get_plugin_identity (PLUGIN_MULTIMASTER_REPLICATION),
|
|
|
a2f18f |
+ OP_FLAG_REPLICATED);
|
|
|
a2f18f |
+ slapi_search_internal_pb (pb);
|
|
|
a2f18f |
+
|
|
|
a2f18f |
+ slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc);
|
|
|
a2f18f |
+ if (rc == LDAP_SUCCESS){
|
|
|
a2f18f |
+ slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_SEARCH_ENTRIES, &entries);
|
|
|
a2f18f |
+ if (NULL == entries || NULL == entries[0]){
|
|
|
a2f18f |
+ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
|
|
|
a2f18f |
+ "agmt_start: replica ruv tombstone entry for "
|
|
|
a2f18f |
+ "replica %s not found\n",
|
|
|
a2f18f |
+ slapi_sdn_get_dn(ra->replarea));
|
|
|
a2f18f |
+ } else {
|
|
|
a2f18f |
+ found_ruv = 1;
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+
|
|
|
a2f18f |
+ /*
|
|
|
a2f18f |
+ * Now it is safe to own the agreement lock
|
|
|
a2f18f |
+ */
|
|
|
a2f18f |
PR_Lock(ra->lock);
|
|
|
a2f18f |
|
|
|
a2f18f |
/* Check that replication is not already started */
|
|
|
a2f18f |
if (ra->protocol != NULL) {
|
|
|
a2f18f |
slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, "replication already started for agreement \"%s\"\n", agmt_get_long_name(ra));
|
|
|
a2f18f |
- PR_Unlock(ra->lock);
|
|
|
a2f18f |
prot_free(&prot;;
|
|
|
a2f18f |
- return 0;
|
|
|
a2f18f |
+ goto done;
|
|
|
a2f18f |
}
|
|
|
a2f18f |
|
|
|
a2f18f |
+ /* Set and start the protocol */
|
|
|
a2f18f |
ra->protocol = prot;
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- /* Start the protocol thread */
|
|
|
a2f18f |
prot_start(ra->protocol);
|
|
|
a2f18f |
|
|
|
a2f18f |
- agmt_set_maxcsn(ra);
|
|
|
a2f18f |
+ /*
|
|
|
a2f18f |
+ * If we found the repl ruv, set the agmt maxcsn...
|
|
|
a2f18f |
+ */
|
|
|
a2f18f |
+ if (found_ruv){
|
|
|
a2f18f |
+ Replica *r;
|
|
|
a2f18f |
+ Object *repl_obj;
|
|
|
a2f18f |
+ char **maxcsns = NULL;
|
|
|
a2f18f |
+ int i;
|
|
|
a2f18f |
|
|
|
a2f18f |
+ maxcsns = slapi_entry_attr_get_charray(entries[0], type_agmtMaxCSN);
|
|
|
a2f18f |
+ repl_obj = prot_get_replica_object(ra->protocol);
|
|
|
a2f18f |
+ if(repl_obj && maxcsns){
|
|
|
a2f18f |
+ r = (Replica *)object_get_data(repl_obj);
|
|
|
a2f18f |
+ if(r){
|
|
|
a2f18f |
+ /*
|
|
|
a2f18f |
+ * Loop over all the agmt maxcsns and find ours...
|
|
|
a2f18f |
+ */
|
|
|
a2f18f |
+ for(i = 0; maxcsns[i]; i++){
|
|
|
a2f18f |
+ char buf[BUFSIZ];
|
|
|
a2f18f |
+ char unavail_buf[BUFSIZ];
|
|
|
a2f18f |
+
|
|
|
a2f18f |
+ PR_snprintf(buf,BUFSIZ,"%s;%s;%s;%d;",slapi_sdn_get_dn(repl_sdn),
|
|
|
a2f18f |
+ slapi_rdn_get_value_by_ref(slapi_rdn_get_rdn(ra->rdn)),
|
|
|
a2f18f |
+ ra->hostname, ra->port);
|
|
|
a2f18f |
+ PR_snprintf(unavail_buf, BUFSIZ,"%s;%s;%s;%d;unavailable", slapi_sdn_get_dn(repl_sdn),
|
|
|
a2f18f |
+ slapi_rdn_get_value_by_ref(slapi_rdn_get_rdn(ra->rdn)),
|
|
|
a2f18f |
+ ra->hostname, ra->port);
|
|
|
a2f18f |
+ if(strstr(maxcsns[i], buf) || strstr(maxcsns[i], unavail_buf)){
|
|
|
a2f18f |
+ /* Set the maxcsn */
|
|
|
a2f18f |
+ slapi_ch_free_string(&ra->maxcsn);
|
|
|
a2f18f |
+ ra->maxcsn = slapi_ch_strdup(maxcsns[i]);
|
|
|
a2f18f |
+ ra->consumerRID = agmt_maxcsn_get_rid(maxcsns[i]);
|
|
|
a2f18f |
+ ra->tmpConsumerRID = 1;
|
|
|
a2f18f |
+ break;
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+ slapi_ch_array_free(maxcsns);
|
|
|
a2f18f |
+ }
|
|
|
a2f18f |
+
|
|
|
a2f18f |
+done:
|
|
|
a2f18f |
PR_Unlock(ra->lock);
|
|
|
a2f18f |
+ slapi_free_search_results_internal(pb);
|
|
|
a2f18f |
+ slapi_pblock_destroy (pb);
|
|
|
a2f18f |
+ slapi_sdn_free(&repl_sdn);
|
|
|
a2f18f |
+
|
|
|
a2f18f |
return 0;
|
|
|
a2f18f |
}
|
|
|
a2f18f |
|
|
|
a2f18f |
@@ -3052,99 +3136,6 @@ agmt_maxcsn_to_smod (Replica *r, Slapi_Mod *smod)
|
|
|
a2f18f |
}
|
|
|
a2f18f |
|
|
|
a2f18f |
/*
|
|
|
a2f18f |
- * Called when we start a repl agmt
|
|
|
a2f18f |
- */
|
|
|
a2f18f |
-void
|
|
|
a2f18f |
-agmt_set_maxcsn(Repl_Agmt *ra)
|
|
|
a2f18f |
-{
|
|
|
a2f18f |
- Slapi_PBlock *pb = NULL;
|
|
|
a2f18f |
- Slapi_Entry **entries = NULL;
|
|
|
a2f18f |
- Replica *r = NULL;
|
|
|
a2f18f |
- Object *repl_obj;
|
|
|
a2f18f |
- const Slapi_DN *tombstone_sdn = NULL;
|
|
|
a2f18f |
- char *attrs[2];
|
|
|
a2f18f |
- int rc;
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- /* read ruv state from the ruv tombstone entry */
|
|
|
a2f18f |
- pb = slapi_pblock_new();
|
|
|
a2f18f |
- if (!pb) {
|
|
|
a2f18f |
- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "agmt_set_maxcsn: Out of memory\n");
|
|
|
a2f18f |
- goto done;
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- repl_obj = prot_get_replica_object(ra->protocol);
|
|
|
a2f18f |
- if(repl_obj){
|
|
|
a2f18f |
- r = (Replica *)object_get_data(repl_obj);
|
|
|
a2f18f |
- tombstone_sdn = replica_get_root(r);
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- ra->maxcsn = NULL;
|
|
|
a2f18f |
- attrs[0] = (char*)type_agmtMaxCSN;
|
|
|
a2f18f |
- attrs[1] = NULL;
|
|
|
a2f18f |
- slapi_search_internal_set_pb_ext(
|
|
|
a2f18f |
- pb,
|
|
|
a2f18f |
- (Slapi_DN *)tombstone_sdn,
|
|
|
a2f18f |
- LDAP_SCOPE_BASE,
|
|
|
a2f18f |
- "objectclass=*",
|
|
|
a2f18f |
- attrs,
|
|
|
a2f18f |
- 0, /* attrsonly */
|
|
|
a2f18f |
- NULL, /* controls */
|
|
|
a2f18f |
- RUV_STORAGE_ENTRY_UNIQUEID,
|
|
|
a2f18f |
- repl_get_plugin_identity (PLUGIN_MULTIMASTER_REPLICATION),
|
|
|
a2f18f |
- OP_FLAG_REPLICATED); /* flags */
|
|
|
a2f18f |
- slapi_search_internal_pb (pb);
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc);
|
|
|
a2f18f |
- if (rc == LDAP_SUCCESS){
|
|
|
a2f18f |
- Replica *r;
|
|
|
a2f18f |
- Object *repl_obj;
|
|
|
a2f18f |
- char **maxcsns;
|
|
|
a2f18f |
- int i;
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_SEARCH_ENTRIES, &entries);
|
|
|
a2f18f |
- if (NULL == entries || NULL == entries[0]){
|
|
|
a2f18f |
- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
|
|
|
a2f18f |
- "agmt_set_maxcsn: replica ruv tombstone entry for "
|
|
|
a2f18f |
- "replica %s not found\n",
|
|
|
a2f18f |
- slapi_sdn_get_dn(ra->replarea));
|
|
|
a2f18f |
- goto done;
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- maxcsns = slapi_entry_attr_get_charray(entries[0], type_agmtMaxCSN);
|
|
|
a2f18f |
- repl_obj = prot_get_replica_object(ra->protocol);
|
|
|
a2f18f |
- if(repl_obj && maxcsns){
|
|
|
a2f18f |
- r = (Replica *)object_get_data(repl_obj);
|
|
|
a2f18f |
- if(r){
|
|
|
a2f18f |
- /*
|
|
|
a2f18f |
- * Loop over all the agmt maxcsns and find ours
|
|
|
a2f18f |
- */
|
|
|
a2f18f |
- for(i = 0; maxcsns[i]; i++){
|
|
|
a2f18f |
- char buf[BUFSIZ];
|
|
|
a2f18f |
- char unavail_buf[BUFSIZ];
|
|
|
a2f18f |
-
|
|
|
a2f18f |
- PR_snprintf(buf,BUFSIZ,"%s;%s;%s;%d;",slapi_sdn_get_dn(ra->replarea),
|
|
|
a2f18f |
- slapi_rdn_get_value_by_ref(slapi_rdn_get_rdn(ra->rdn)),
|
|
|
a2f18f |
- ra->hostname, ra->port);
|
|
|
a2f18f |
- PR_snprintf(unavail_buf, BUFSIZ,"%s;%s;%s;%d;unavailable", slapi_sdn_get_dn(ra->replarea),
|
|
|
a2f18f |
- slapi_rdn_get_value_by_ref(slapi_rdn_get_rdn(ra->rdn)),
|
|
|
a2f18f |
- ra->hostname, ra->port);
|
|
|
a2f18f |
- if(strstr(maxcsns[i], buf) || strstr(maxcsns[i], unavail_buf)){
|
|
|
a2f18f |
- slapi_ch_free_string(&ra->maxcsn);
|
|
|
a2f18f |
- ra->maxcsn = slapi_ch_strdup(maxcsns[i]);
|
|
|
a2f18f |
- ra->consumerRID = agmt_maxcsn_get_rid(maxcsns[i]);
|
|
|
a2f18f |
- ra->tmpConsumerRID = 1;
|
|
|
a2f18f |
- break;
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
- slapi_ch_array_free(maxcsns);
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
-done:
|
|
|
a2f18f |
- if (NULL != pb){
|
|
|
a2f18f |
- slapi_free_search_results_internal(pb);
|
|
|
a2f18f |
- slapi_pblock_destroy (pb);
|
|
|
a2f18f |
- }
|
|
|
a2f18f |
-}
|
|
|
a2f18f |
-
|
|
|
a2f18f |
-/*
|
|
|
a2f18f |
* Parse out the consumer replicaID from the agmt maxcsn
|
|
|
a2f18f |
*
|
|
|
a2f18f |
* "repl area;agmt_rdn;hostname;port;consumer_rid;maxcsn"
|
|
|
a2f18f |
--
|
|
|
a2f18f |
1.9.3
|
|
|
a2f18f |
|