andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 4 months ago
Clone
Blob Blame History Raw
From b28cb726b667120c7bb685c025f01a03832e137b Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Tue, 21 Apr 2015 10:33:49 -0400
Subject: [PATCH 321/323] Ticket 48151 - Improve CleanAllRUV logging

Bug Description:  It is very difficult to troubleshoot cleanAllRUV issues
                  when there are multiple clean tasks running.

Fix Description:  Add the replica ID to each logging statement.

https://fedorahosted.org/389/ticket/48151

Reviewed by: rmeggins(Thanks!)

(cherry picked from commit 33330c355ace8f3d351ea1db94cf2e8faf12f19f)

Conflicts:
	ldap/servers/plugins/replication/repl5_replica_config.c

(cherry picked from commit 6b873e6bdf736bacc16f832686cae1d64175f675)
---
 ldap/servers/plugins/replication/repl5.h           |   2 +-
 .../plugins/replication/repl5_replica_config.c     | 183 +++++++++++----------
 2 files changed, 99 insertions(+), 86 deletions(-)

diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
index 231daf5..10c0869 100644
--- a/ldap/servers/plugins/replication/repl5.h
+++ b/ldap/servers/plugins/replication/repl5.h
@@ -646,7 +646,7 @@ int is_task_aborted(ReplicaId rid);
 void delete_aborted_rid(Replica *replica, ReplicaId rid, char *repl_root, int skip);
 int is_pre_cleaned_rid(ReplicaId rid);
 void set_cleaned_rid(ReplicaId rid);
-void cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...);
+void cleanruv_log(Slapi_Task *task, int rid, char *task_type, char *fmt, ...);
 char * replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn);
 
 #define CLEANRIDSIZ 4 /* maximum number for concurrent CLEANALLRUV tasks */
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
index daefc03..9f9197e 100644
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
@@ -1262,27 +1262,27 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter,
     /*
      *  Get our task settings
      */
-    if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){
-        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-base-dn attribute");
-        cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+    if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){
+        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-id attribute");
+        cleanruv_log(task, -1, CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OBJECT_CLASS_VIOLATION;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
     }
-    if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){
-        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-id attribute");
-        cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+    rid = atoi(rid_str);
+    if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){
+        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing replica-base-dn attribute");
+        cleanruv_log(task, (int)rid, CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OBJECT_CLASS_VIOLATION;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
     }
-
     if ((force_cleaning = fetch_attr(e, "replica-force-cleaning", 0)) != NULL){
         if(strcasecmp(force_cleaning,"yes") != 0 && strcasecmp(force_cleaning,"no") != 0){
             PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid value for replica-force-cleaning "
                 "(%s).  Value must be \"yes\" or \"no\" for task - (%s)",
                 force_cleaning, slapi_sdn_get_dn(task_dn));
-            cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+            cleanruv_log(task, (int)rid, CLEANALLRUV_ID, "%s", returntext);
             *returncode = LDAP_OPERATIONS_ERROR;
             rc = SLAPI_DSE_CALLBACK_ERROR;
             goto out;
@@ -1293,11 +1293,10 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter,
     /*
      *  Check the rid
      */
-    rid = atoi(rid_str);
     if (rid <= 0 || rid >= READ_ONLY_REPLICA_ID){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid replica id (%d) for task - (%s)",
             rid, slapi_sdn_get_dn(task_dn));
-        cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -1305,7 +1304,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter,
     if(is_cleaned_rid(rid)){
          /* we are already cleaning this rid */
          PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is already being cleaned", rid);
-         cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+         cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext);
          *returncode = LDAP_UNWILLING_TO_PERFORM;
          rc = SLAPI_DSE_CALLBACK_ERROR;
          goto out;
@@ -1316,7 +1315,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter,
     dn = slapi_sdn_new_dn_byval(base_dn);
     if((r = replica_get_replica_from_dn(dn)) == NULL){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Could not find replica from dn(%s)",slapi_sdn_get_dn(dn));
-        cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -1327,7 +1326,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter,
 
 out:
     if(rc){
-        cleanruv_log(task, CLEANALLRUV_ID, "Task failed...(%d)", rc);
+        cleanruv_log(task, rid, CLEANALLRUV_ID, "Task failed...(%d)", rc);
         slapi_task_finish(task, *returncode);
     } else {
         rc = SLAPI_DSE_CALLBACK_OK;
@@ -1359,11 +1358,11 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
     char *basedn = NULL;
     int rc = 0;
 
-    cleanruv_log(pre_task, CLEANALLRUV_ID,"Initiating CleanAllRUV Task...");
+    cleanruv_log(pre_task, rid, CLEANALLRUV_ID,"Initiating CleanAllRUV Task...");
 
     if(get_cleanruv_task_count() >= CLEANRIDSIZ){
         /* we are already running the maximum number of tasks */
-        cleanruv_log(pre_task, CLEANALLRUV_ID,
+        cleanruv_log(pre_task, rid, CLEANALLRUV_ID,
     	    "Exceeded maximum number of active CLEANALLRUV tasks(%d)",CLEANRIDSIZ);
         return LDAP_UNWILLING_TO_PERFORM;
     }
@@ -1373,7 +1372,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
     if(r){
         replica = (Replica*)object_get_data (r);
     } else {
-        cleanruv_log(pre_task, CLEANALLRUV_ID, "Replica object is NULL, aborting task");
+        cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Replica object is NULL, aborting task");
         return -1;
     }
     /*
@@ -1381,7 +1380,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
      */
     if(replica_get_type(replica) == REPLICA_TYPE_READONLY){
         /* this is a consumer, send error */
-        cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to clean rid (%d), task can not be run on a consumer",rid);
+        cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to clean rid (%d), task can not be run on a consumer",rid);
         if(task){
             rc = -1;
             slapi_task_finish(task, rc);
@@ -1391,7 +1390,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
     /*
      *  Grab the max csn of the deleted replica
      */
-    cleanruv_log(pre_task, CLEANALLRUV_ID, "Retrieving maxcsn...");
+    cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Retrieving maxcsn...");
     basedn = (char *)slapi_sdn_get_dn(replica_get_root(replica));
     maxcsn = replica_cleanallruv_find_maxcsn(replica, rid, basedn);
     if(maxcsn == NULL || csn_get_replicaid(maxcsn) == 0){
@@ -1404,7 +1403,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
         csn_init_by_string(maxcsn, "");
     }
     csn_as_string(maxcsn, PR_FALSE, csnstr);
-    cleanruv_log(pre_task, CLEANALLRUV_ID, "Found maxcsn (%s)",csnstr);
+    cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Found maxcsn (%s)",csnstr);
     /*
      *  Create payload
      */
@@ -1413,7 +1412,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
     slapi_ch_free_string(&ridstr);
 
     if(payload == NULL){
-        cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to create extended op payload, aborting task");
+        cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to create extended op payload, aborting task");
         rc = -1;
         goto fail;
     }
@@ -1423,7 +1422,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
      */
     data = (cleanruv_data*)slapi_ch_calloc(1, sizeof(cleanruv_data));
     if (data == NULL) {
-        cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to allocate cleanruv_data.  Aborting task.");
+        cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to allocate cleanruv_data.  Aborting task.");
         rc = -1;
         goto fail;
     }
@@ -1450,7 +1449,7 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c
     }
 
 fail:
-    cleanruv_log(pre_task, CLEANALLRUV_ID, "Failed to clean rid (%d)",rid);
+    cleanruv_log(pre_task, rid, CLEANALLRUV_ID, "Failed to clean rid (%d)", rid);
     if(task){
         slapi_task_finish(task, rc);
     }
@@ -1516,9 +1515,9 @@ replica_cleanallruv_thread(void *arg)
         PR_Unlock( notify_lock );
         data->repl_obj = replica_get_replica_from_dn(data->sdn);
         if(data->repl_obj == NULL){
-        	cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to retrieve repl object from dn(%s).", data->sdn);
-        	aborted = 1;
-        	goto done;
+            cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Unable to retrieve repl object from dn(%s).", data->sdn);
+            aborted = 1;
+            goto done;
         }
         data->replica = (Replica*)object_get_data(data->repl_obj);
         free_obj = 1;
@@ -1530,9 +1529,9 @@ replica_cleanallruv_thread(void *arg)
     }
     /* verify we have set our repl objects */
     if(data->repl_obj == NULL || data->replica == NULL){
-    	cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to set the replica objects.");
-    	aborted = 1;
-    	goto done;
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Unable to set the replica objects.");
+        aborted = 1;
+        goto done;
     }
     if(data->repl_root == NULL){
         /* we must have resumed from start up, fill in the repl root */
@@ -1552,11 +1551,11 @@ replica_cleanallruv_thread(void *arg)
      *  Add the cleanallruv task to the repl config - so we can handle restarts
      */
     add_cleaned_rid(data->rid, data->replica, csnstr, data->force); /* marks config that we started cleaning a rid */
-    cleanruv_log(data->task, CLEANALLRUV_ID, "Cleaning rid (%d)...", data->rid);
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Cleaning rid (%d)...", data->rid);
     /*
      *  First, wait for the maxcsn to be covered
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID, "Waiting to process all the updates from the deleted replica...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Waiting to process all the updates from the deleted replica...");
     ruv_obj = replica_get_ruv(data->replica);
     ruv = object_get_data (ruv_obj);
     while(data->maxcsn && !is_task_aborted(data->rid) && !is_cleaned_rid(data->rid) && !slapi_is_shutting_down()){
@@ -1574,7 +1573,7 @@ replica_cleanallruv_thread(void *arg)
      *
      *  Even if we are forcing the cleaning, the replicas still need to be up
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to be online...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Waiting for all the replicas to be online...");
     if(check_agmts_are_alive(data->replica, data->rid, data->task)){
         /* error, aborted or shutdown */
         aborted = 1;
@@ -1583,7 +1582,7 @@ replica_cleanallruv_thread(void *arg)
     /*
      *  Make sure all the replicas have seen the max csn
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to receive all the deleted replica updates...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Waiting for all the replicas to receive all the deleted replica updates...");
     if(strcasecmp(data->force,"no") == 0 && check_agmts_are_caught_up(data, csnstr)){
         /* error, aborted or shutdown */
         aborted = 1;
@@ -1597,7 +1596,7 @@ replica_cleanallruv_thread(void *arg)
     /*
      *  Now send the cleanruv extended op to all the agreements
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID, "Sending cleanAllRUV task to all the replicas...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Sending cleanAllRUV task to all the replicas...");
     while(agmt_not_notified && !is_task_aborted(data->rid) && !slapi_is_shutting_down()){
         agmt_obj = agmtlist_get_first_agreement_for_replica (data->replica);
         if(agmt_obj == NULL){
@@ -1615,7 +1614,7 @@ replica_cleanallruv_thread(void *arg)
                 agmt_not_notified = 0;
             } else {
                 agmt_not_notified = 1;
-                cleanruv_log(data->task, CLEANALLRUV_ID, "Failed to send task to replica (%s)",agmt_get_long_name(agmt));
+                cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Failed to send task to replica (%s)",agmt_get_long_name(agmt));
                 break;
             }
             agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj);
@@ -1631,7 +1630,7 @@ replica_cleanallruv_thread(void *arg)
         /*
          *  need to sleep between passes
          */
-        cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas have received the "
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas have received the "
             "cleanallruv extended op, retrying in %d seconds",interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
@@ -1646,12 +1645,13 @@ replica_cleanallruv_thread(void *arg)
     /*
      *  Run the CLEANRUV task
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID,"Cleaning local ruv's...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Cleaning local ruv's...");
     replica_execute_cleanruv_task (data->repl_obj, data->rid, returntext);
     /*
      *  Wait for all the replicas to be cleaned
      */
-    cleanruv_log(data->task, CLEANALLRUV_ID,"Waiting for all the replicas to be cleaned...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,
+            "Waiting for all the replicas to be cleaned...");
 
     interval = 10;
     while(found_dirty_rid && !is_task_aborted(data->rid) && !slapi_is_shutting_down()){
@@ -1670,7 +1670,8 @@ replica_cleanallruv_thread(void *arg)
                 found_dirty_rid = 0;
             } else {
                 found_dirty_rid = 1;
-                cleanruv_log(data->task, CLEANALLRUV_ID,"Replica is not cleaned yet (%s)",agmt_get_long_name(agmt));
+                cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Replica is not cleaned yet (%s)",
+                        agmt_get_long_name(agmt));
                 break;
             }
             agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj);
@@ -1686,7 +1687,7 @@ replica_cleanallruv_thread(void *arg)
         /*
          *  need to sleep between passes
          */
-        cleanruv_log(data->task, CLEANALLRUV_ID, "Replicas have not been cleaned yet, "
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, "
             "retrying in %d seconds", interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
@@ -1708,16 +1709,16 @@ done:
         delete_cleaned_rid_config(data);
         /* make sure all the replicas have been "pre_cleaned" before finishing */
         check_replicas_are_done_cleaning(data);
-        cleanruv_log(data->task, CLEANALLRUV_ID, "Successfully cleaned rid(%d).", data->rid);
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Successfully cleaned rid(%d).", data->rid);
         remove_cleaned_rid(data->rid);
     } else {
         /*
          *  Shutdown or abort
          */
         if(!is_task_aborted(data->rid)){
-            cleanruv_log(data->task, CLEANALLRUV_ID,"Server shutting down.  Process will resume at server startup");
+            cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Server shutting down.  Process will resume at server startup");
         } else {
-            cleanruv_log(data->task, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid);
+            cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid);
             delete_cleaned_rid_config(data);
             remove_cleaned_rid(data->rid);
         }
@@ -1753,7 +1754,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data )
     int not_all_cleaned = 1;
     int interval = 10;
 
-    cleanruv_log(data->task, CLEANALLRUV_ID, "Waiting for all the replicas to finish cleaning...");
+    cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Waiting for all the replicas to finish cleaning...");
 
     csn_as_string(data->maxcsn, PR_FALSE, csnstr);
     filter = PR_smprintf("(%s=%d:%s:%s)", type_replicaCleanRUV,(int)data->rid, csnstr, data->force);
@@ -1781,7 +1782,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data )
         if(not_all_cleaned == 0 || is_task_aborted(data->rid) ){
             break;
         }
-        cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval);
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
         PR_Unlock( notify_lock );
@@ -1856,7 +1857,7 @@ check_replicas_are_done_aborting(cleanruv_data *data )
     int not_all_aborted = 1;
     int interval = 10;
 
-    cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Waiting for all the replicas to finish aborting...");
+    cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Waiting for all the replicas to finish aborting...");
 
     filter = PR_smprintf("(%s=%d:%s)", type_replicaAbortCleanRUV, data->rid, data->repl_root);
 
@@ -1884,7 +1885,7 @@ check_replicas_are_done_aborting(cleanruv_data *data )
         if(not_all_aborted == 0){
             break;
         }
-        cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval);
+        cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
         PR_Unlock( notify_lock );
@@ -1928,7 +1929,7 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn)
                 not_all_caughtup = 0;
             } else {
                 not_all_caughtup = 1;
-                cleanruv_log(data->task, CLEANALLRUV_ID, "Replica not caught up (%s)",agmt_get_long_name(agmt));
+                cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replica not caught up (%s)",agmt_get_long_name(agmt));
                 break;
             }
             agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj);
@@ -1937,7 +1938,8 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn)
         if(not_all_caughtup == 0 || is_task_aborted(data->rid) ){
             break;
         }
-        cleanruv_log(data->task, CLEANALLRUV_ID, "Not all replicas caught up, retrying in %d seconds",interval);
+        cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,
+                "Not all replicas caught up, retrying in %d seconds",interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
         PR_Unlock( notify_lock );
@@ -1985,7 +1987,8 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task)
                 not_all_alive = 0;
             } else {
                 not_all_alive = 1;
-                cleanruv_log(task, CLEANALLRUV_ID, "Replica not online (%s)",agmt_get_long_name(agmt));
+                cleanruv_log(task, rid, CLEANALLRUV_ID, "Replica not online (%s)",
+                        agmt_get_long_name(agmt));
                 break;
             }
             agmt_obj = agmtlist_get_next_agreement_for_replica (replica, agmt_obj);
@@ -1994,7 +1997,8 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task)
         if(not_all_alive == 0 || is_task_aborted(rid)){
             break;
         }
-        cleanruv_log(task, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...",interval);
+        cleanruv_log(task, rid, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...",
+                interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
         PR_Unlock( notify_lock );
@@ -2098,8 +2102,9 @@ replica_send_cleanruv_task(Repl_Agmt *agmt, cleanruv_data *clean_data)
     if(rc != LDAP_SUCCESS){
         char *hostname = agmt_get_hostname(agmt);
 
-        cleanruv_log(clean_data->task, CLEANALLRUV_ID, "Failed to add CLEANRUV task (%s) to replica "
-            "(%s).  You will need to manually run the CLEANRUV task on this replica (%s) error (%d)",
+        cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,
+            "Failed to add CLEANRUV task (%s) to replica (%s).  You will need "
+            "to manually run the CLEANRUV task on this replica (%s) error (%d)",
             repl_dn, agmt_get_long_name(agmt), hostname, rc);
         slapi_ch_free_string(&hostname);
     }
@@ -2371,7 +2376,7 @@ delete_cleaned_rid_config(cleanruv_data *clean_data)
     int rc = -1, ret, rid;
 
     if(clean_data == NULL){
-        cleanruv_log(NULL, CLEANALLRUV_ID, "delete_cleaned_rid_config: cleanruv data is NULL, "
+        cleanruv_log(NULL, clean_data->rid, CLEANALLRUV_ID, "delete_cleaned_rid_config: cleanruv data is NULL, "
                 "failed to clean the config.");
         return;
     }
@@ -2391,7 +2396,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data)
     slapi_search_internal_pb(pb);
     slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &ret);
     if (ret != LDAP_SUCCESS){
-        cleanruv_log(clean_data->task, CLEANALLRUV_ID,"delete_cleaned_rid_config: internal search failed(%d).",ret);
+        cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,
+                "delete_cleaned_rid_config: internal search failed(%d).",ret);
         goto bail;
     } else {
         slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_SEARCH_ENTRIES, &entries);
@@ -2399,7 +2405,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data)
             /*
              *  No matching entries!
              */
-            cleanruv_log(clean_data->task, CLEANALLRUV_ID,"delete_cleaned_rid_config: failed to find any "
+            cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,
+                    "delete_cleaned_rid_config: failed to find any "
                     "entries with nsds5ReplicaCleanRUV under (%s)", dn);
             goto bail;
         } else {
@@ -2450,7 +2457,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data)
                 }
 
                 if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_OBJECT){
-                    cleanruv_log(clean_data->task, CLEANALLRUV_ID, "delete_cleaned_rid_config: failed to remove task data "
+                    cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,
+                            "delete_cleaned_rid_config: failed to remove task data "
                             "from (%s) error (%d), rid (%d)", edn, rc, clean_data->rid);
                     goto bail;
                 }
@@ -2460,7 +2468,8 @@ delete_cleaned_rid_config(cleanruv_data *clean_data)
 
 bail:
     if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_OBJECT){
-        cleanruv_log(clean_data->task, CLEANALLRUV_ID, "delete_cleaned_rid_config: failed to remove replica config "
+        cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,
+            "delete_cleaned_rid_config: failed to remove replica config "
             "(%d), rid (%d)", rc, clean_data->rid);
     }
     slapi_free_search_results_internal(pb);
@@ -2518,8 +2527,10 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
 
     if(get_abort_cleanruv_task_count() >= CLEANRIDSIZ){
         /* we are already running the maximum number of tasks */
-        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",CLEANRIDSIZ);
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE,
+                "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",
+                CLEANRIDSIZ);
+        cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OPERATIONS_ERROR;
         return SLAPI_DSE_CALLBACK_ERROR;
     }
@@ -2531,7 +2542,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
      */
     if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-id\"");
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OBJECT_CLASS_VIOLATION;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2544,14 +2555,14 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
     if (rid <= 0 || rid >= READ_ONLY_REPLICA_ID){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid replica id (%d) for task - (%s)",
             rid, slapi_sdn_get_dn(slapi_entry_get_sdn(e)));
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID,"%s", returntext);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"%s", returntext);
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
     }
     if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-base-dn\"");
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OBJECT_CLASS_VIOLATION;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2559,7 +2570,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
     if(!is_cleaned_rid(rid) && !is_pre_cleaned_rid(rid)){
         /* we are not cleaning this rid */
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is not being cleaned, nothing to abort.", rid);
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_UNWILLING_TO_PERFORM;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2567,7 +2578,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
     if(is_task_aborted(rid)){
         /* we are already aborting this rid */
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Replica id (%d) is already being aborted", rid);
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_UNWILLING_TO_PERFORM;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2578,7 +2589,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
     sdn = slapi_sdn_new_dn_byval(base_dn);
     if((r = replica_get_replica_from_dn(sdn)) == NULL){
         PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Failed to find replica from dn(%s)", base_dn);
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext);
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2590,7 +2601,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
         if(strcasecmp(certify_all,"yes") && strcasecmp(certify_all,"no")){
             PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid value for \"replica-certify-all\", the value "
                 "must be \"yes\" or \"no\".");
-            cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext);
+            cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "%s", returntext);
             *returncode = LDAP_OPERATIONS_ERROR;
             rc = SLAPI_DSE_CALLBACK_ERROR;
             goto out;
@@ -2611,7 +2622,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
     payload = create_cleanruv_payload(ridstr);
 
     if(payload == NULL){
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to create extended op payload, aborting task");
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "Failed to create extended op payload, aborting task");
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2627,7 +2638,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
      */
     data = (cleanruv_data*)slapi_ch_calloc(1, sizeof(cleanruv_data));
     if (data == NULL) {
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Failed to allocate abort_cleanruv_data.  Aborting task.");
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"Failed to allocate abort_cleanruv_data.  Aborting task.");
         *returncode = LDAP_OPERATIONS_ERROR;
         rc = SLAPI_DSE_CALLBACK_ERROR;
         goto out;
@@ -2646,7 +2657,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter
                 PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE);
     if (thread == NULL) {
         object_release(r);
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Unable to create abort thread.  Aborting task.");
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID,"Unable to create abort thread.  Aborting task.");
         *returncode = LDAP_OPERATIONS_ERROR;
         slapi_ch_free_string(&data->certify);
         rc = SLAPI_DSE_CALLBACK_ERROR;
@@ -2657,7 +2668,7 @@ out:
     slapi_sdn_free(&sdn);
 
     if(rc != SLAPI_DSE_CALLBACK_OK){
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Abort Task failed (%d)", rc);
+        cleanruv_log(task, rid, ABORT_CLEANALLRUV_ID, "Abort Task failed (%d)", rc);
         slapi_task_finish(task, rc);
     }
 
@@ -2678,7 +2689,7 @@ replica_abort_task_thread(void *arg)
     int release_it = 0;
     int count = 0, rc = 0;
 
-    cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Aborting task for rid(%d)...",data->rid);
+    cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Aborting task for rid(%d)...",data->rid);
 
     /*
      *   Need to build the replica from the dn
@@ -2689,7 +2700,7 @@ replica_abort_task_thread(void *arg)
          * to timing issues, we need to wait to grab the replica obj until we get here.
          */
         if((data->repl_obj = replica_get_replica_from_dn(data->sdn)) == NULL){
-            cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to get replica object from dn (%s).", slapi_sdn_get_dn(data->sdn));
+            cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Failed to get replica object from dn (%s).", slapi_sdn_get_dn(data->sdn));
             goto done;
         }
         if(data->replica == NULL && data->repl_obj){
@@ -2737,7 +2748,7 @@ replica_abort_task_thread(void *arg)
         /*
          *  need to sleep between passes
          */
-        cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval);
+        cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval);
         PR_Lock( notify_lock );
         PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) );
         PR_Unlock( notify_lock );
@@ -2752,17 +2763,17 @@ replica_abort_task_thread(void *arg)
 done:
     if(agmt_not_notified){
         /* failure */
-        cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup.");
+        cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup.");
     } else {
         /*
          *  Wait for this server to stop its cleanallruv task(which removes the rid from the cleaned list)
          */
-        cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort...");
+        cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort...");
         while(is_cleaned_rid(data->rid)){
             DS_Sleep(PR_SecondsToInterval(1));
             count++;
             if(count == 60){ /* it should not take this long */
-                cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "CleanAllRUV task failed to abort.  You might need to "
+                cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "CleanAllRUV task failed to abort.  You might need to "
                         "rerun the task.");
                 rc = -1;
                 break;
@@ -2777,9 +2788,9 @@ done:
         }
         delete_aborted_rid(data->replica, data->rid, data->repl_root, 0); /* remove the in-memory aborted rid */
         if(rc == 0){
-            cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Successfully aborted task for rid(%d)", data->rid);
+            cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Successfully aborted task for rid(%d)", data->rid);
         } else {
-            cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to abort task for rid(%d)",data->rid);
+            cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Failed to abort task for rid(%d)",data->rid);
         }
     }
 
@@ -2815,10 +2826,12 @@ replica_cleanallruv_send_abort_extop(Repl_Agmt *ra, Slapi_Task *task, struct ber
          */
         rc = crc;
         if(rc){
-        	cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to send extop to replica(%s).", agmt_get_long_name(ra));
+            cleanruv_log(task, agmt_get_consumer_rid(ra, conn), ABORT_CLEANALLRUV_ID,
+                    "Failed to send extop to replica(%s).", agmt_get_long_name(ra));
         }
     } else {
-        cleanruv_log(task, ABORT_CLEANALLRUV_ID, "Failed to connect to replica(%s).", agmt_get_long_name(ra));
+        cleanruv_log(task, agmt_get_consumer_rid(ra, conn), ABORT_CLEANALLRUV_ID,
+                "Failed to connect to replica(%s).", agmt_get_long_name(ra));
         rc = -1;
     }
     conn_delete_internal_ext(conn);
@@ -2853,7 +2866,7 @@ replica_cleanallruv_send_extop(Repl_Agmt *ra, cleanruv_data *clean_data, int che
                     /* extop was accepted */
                     rc = 0;
                 } else {
-                    cleanruv_log(clean_data->task, CLEANALLRUV_ID,"Replica %s does not support the CLEANALLRUV task.  "
+                    cleanruv_log(clean_data->task, clean_data->rid, CLEANALLRUV_ID,"Replica %s does not support the CLEANALLRUV task.  "
                         "Sending replica CLEANRUV task...", slapi_sdn_get_dn(agmt_get_dn_byref(ra)));
                     /*
                      *  Ok, this replica doesn't know about CLEANALLRUV, so just manually
@@ -3019,7 +3032,7 @@ replica_cleanallruv_check_maxcsn(Repl_Agmt *agmt, char *basedn, char *rid_text,
                     csn_init_by_string(repl_max, remote_maxcsn);
                     if(csn_compare (repl_max, max) < 0){
                         /* we are not caught up yet, free, and return */
-                        cleanruv_log(task, CLEANALLRUV_ID,"Replica maxcsn (%s) is not caught up with deleted replica's maxcsn(%s)",
+                        cleanruv_log(task, atoi(rid_text), CLEANALLRUV_ID,"Replica maxcsn (%s) is not caught up with deleted replica's maxcsn(%s)",
                             remote_maxcsn, maxcsn);
                         rc = -1;
                     } else {
@@ -3181,7 +3194,7 @@ stop_ruv_cleaning()
  *  Write our logging to the task and error log
  */
 void
-cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...)
+cleanruv_log(Slapi_Task *task, int rid, char *task_type, char *fmt, ...)
 {
     va_list ap1;
     va_list ap2;
@@ -3199,7 +3212,7 @@ cleanruv_log(Slapi_Task *task, char *task_type, char *fmt, ...)
         slapi_task_log_status_ext(task, fmt, ap2);
         slapi_task_inc_progress(task);
     }
-    errlog_fmt = PR_smprintf("%s: %s\n",task_type, fmt);
+    errlog_fmt = PR_smprintf("%s (rid %d): %s \n",task_type, rid, fmt);
     slapi_log_error_ext(SLAPI_LOG_FATAL, repl_plugin_name, errlog_fmt, ap3, ap4);
     slapi_ch_free_string(&errlog_fmt);
 
@@ -3261,7 +3274,7 @@ replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn)
         }
     } else {
         /* internal search failed */
-        cleanruv_log(NULL, CLEANALLRUV_ID, "replica_cleanallruv_get_local_maxcsn: internal search failed (%d)\n", res);
+        cleanruv_log(NULL, (int)rid, CLEANALLRUV_ID, "replica_cleanallruv_get_local_maxcsn: internal search failed (%d)\n", res);
     }
 
 	slapi_free_search_results_internal(search_pb);
-- 
1.9.3