Blame SOURCES/0036-Ticket-48118-At-startup-changelog-can-be-erronously-.patch

96373c
From 127e0d954eb7741c4afdc0305f7970b7ea164e8d Mon Sep 17 00:00:00 2001
96373c
From: Ludwig Krispenz <lkrispen@redhat.com>
96373c
Date: Thu, 9 Nov 2017 11:28:34 +0100
96373c
Subject: [PATCH]     Ticket 48118 - At startup, changelog can be erronously
96373c
 rebuilt after a normal shutdown
96373c
96373c
    Problem: There are two problems that can lead to inconsistent database and changelog maxruv:
96373c
    1] the database ruv is written periodically in th ehouskeeping thread and at shutdown. It
96373c
       relies on teh ruv_dirty flag, but due to a race condition this can be reset befor writing
96373c
       the ruv
96373c
    2] the changelog max ruv is updated whenever an operation is commutted, but in case of internal
96373c
       operations inside the txn for a client operation, if the operation is aborted the cl maxruv
96373c
       is not reset. Since it is only written at shutdown this normally is no problem, but if the
96373c
       aborted operation is the last before shutdown or is aborted by shutdown the cl ruv is incorrect
96373c
96373c
    Fix: the fix is in two parts:
96373c
    1] remove the use of the dirty flag, ensure that the ruv is always written. The overhead for writing
96373c
       a database ruv that has not changed is minimal
96373c
    2] when writing the changelog maxruv check if the macsns it contains are really present in the
96373c
       changelog. If not the maxruv is not written, it will be reconstructed at the next startup
96373c
96373c
    Reviewed by: William,Thierry - Thanks
96373c
---
96373c
 ldap/servers/plugins/replication/cl5_api.c         | 39 ++++++++++++++++++++++
96373c
 ldap/servers/plugins/replication/repl5.h           |  1 -
96373c
 ldap/servers/plugins/replication/repl5_replica.c   | 32 +-----------------
96373c
 .../plugins/replication/repl5_replica_config.c     |  2 --
96373c
 4 files changed, 40 insertions(+), 34 deletions(-)
96373c
96373c
diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c
96373c
index ec648c014..55032dfb0 100644
96373c
--- a/ldap/servers/plugins/replication/cl5_api.c
96373c
+++ b/ldap/servers/plugins/replication/cl5_api.c
96373c
@@ -250,6 +250,8 @@ static void _cl5ReadBerval(struct berval *bv, char **buff);
96373c
 static void _cl5WriteBerval(struct berval *bv, char **buff);
96373c
 static int _cl5ReadBervals(struct berval ***bv, char **buff, unsigned int size);
96373c
 static int _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size);
96373c
+static int64_t _cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv);
96373c
+static int64_t _cl5CheckCSNinCL(const ruv_enum_data *element, void *arg);
96373c
 
96373c
 /* replay iteration */
96373c
 #ifdef FOR_DEBUGGING
96373c
@@ -2716,6 +2718,36 @@ _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size)
96373c
     return CL5_SUCCESS;
96373c
 }
96373c
 
96373c
+static int64_t
96373c
+_cl5CheckCSNinCL(const ruv_enum_data *element, void *arg)
96373c
+{
96373c
+    CL5DBFile *file = (CL5DBFile *)arg;
96373c
+    int rc = 0;
96373c
+
96373c
+    DBT key = {0}, data = {0};
96373c
+    char csnStr[CSN_STRSIZE];
96373c
+
96373c
+    /* construct the key */
96373c
+    key.data = csn_as_string(element->csn, PR_FALSE, csnStr);
96373c
+    key.size = CSN_STRSIZE;
96373c
+
96373c
+    data.flags = DB_DBT_MALLOC;
96373c
+
96373c
+    rc = file->db->get(file->db, NULL /*txn*/, &key, &data, 0);
96373c
+
96373c
+    slapi_ch_free(&(data.data));
96373c
+    return rc;
96373c
+}
96373c
+
96373c
+static int64_t
96373c
+_cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv)
96373c
+{
96373c
+    int rc = 0;
96373c
+
96373c
+    rc = ruv_enumerate_elements(maxruv, _cl5CheckCSNinCL, (void *)file);
96373c
+
96373c
+    return rc;
96373c
+}
96373c
 /* upgrade from db33 to db41
96373c
  * 1. Run recovery on the database environment using the DB_ENV->open method
96373c
  * 2. Remove any Berkeley DB environment using the DB_ENV->remove method
96373c
@@ -4010,6 +4042,13 @@ _cl5WriteRUV(CL5DBFile *file, PRBool purge)
96373c
         rc = ruv_to_bervals(file->maxRUV, &vals);
96373c
     }
96373c
 
96373c
+    if (!purge && _cl5CheckMaxRUV(file, file->maxRUV)) {
96373c
+        slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name_cl,
96373c
+                      "_cl5WriteRUV - changelog maxRUV not found in changelog for file %s\n",
96373c
+                      file->name);
96373c
+        return CL5_DB_ERROR;
96373c
+    }
96373c
+
96373c
     key.size = CSN_STRSIZE;
96373c
 
96373c
     rc = _cl5WriteBervals(vals, &buff, &data.size);
96373c
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
96373c
index c6e79b7e2..4e206a0fc 100644
96373c
--- a/ldap/servers/plugins/replication/repl5.h
96373c
+++ b/ldap/servers/plugins/replication/repl5.h
96373c
@@ -725,7 +725,6 @@ Object *replica_get_for_backend(const char *be_name);
96373c
 void replica_set_purge_delay(Replica *r, uint32_t purge_delay);
96373c
 void replica_set_tombstone_reap_interval(Replica *r, long interval);
96373c
 void replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv);
96373c
-void replica_set_ruv_dirty(Replica *r);
96373c
 Slapi_Entry *get_in_memory_ruv(Slapi_DN *suffix_sdn);
96373c
 int replica_write_ruv(Replica *r);
96373c
 char *replica_get_dn(Replica *r);
96373c
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
96373c
index e5296bf1c..77f4f18e4 100644
96373c
--- a/ldap/servers/plugins/replication/repl5_replica.c
96373c
+++ b/ldap/servers/plugins/replication/repl5_replica.c
96373c
@@ -41,7 +41,6 @@ struct replica
96373c
     ReplicaType repl_type;             /* is this replica read-only ? */
96373c
     ReplicaId repl_rid;                /* replicaID */
96373c
     Object *repl_ruv;                  /* replica update vector */
96373c
-    PRBool repl_ruv_dirty;             /* Dirty flag for ruv */
96373c
     CSNPL *min_csn_pl;                 /* Pending list for minimal CSN */
96373c
     void *csn_pl_reg_id;               /* registration assignment for csn callbacks */
96373c
     unsigned long repl_state_flags;    /* state flags */
96373c
@@ -788,7 +787,6 @@ replica_set_ruv(Replica *r, RUV *ruv)
96373c
     }
96373c
 
96373c
     r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
96373c
-    r->repl_ruv_dirty = PR_TRUE;
96373c
 
96373c
     replica_unlock(r->repl_lock);
96373c
 }
96373c
@@ -860,9 +858,6 @@ replica_update_ruv(Replica *r, const CSN *updated_csn, const char *replica_purl)
96373c
                                                     "to update RUV for replica %s, csn = %s\n",
96373c
                                   slapi_sdn_get_dn(r->repl_root),
96373c
                                   csn_as_string(updated_csn, PR_FALSE, csn_str));
96373c
-                } else {
96373c
-                    /* RUV updated - mark as dirty */
96373c
-                    r->repl_ruv_dirty = PR_TRUE;
96373c
                 }
96373c
             } else {
96373c
                 slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name,
96373c
@@ -1347,8 +1342,6 @@ replica_dump(Replica *r)
96373c
     slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tupdate dn: %s\n",
96373c
                   updatedn_list ? updatedn_list : "not configured");
96373c
     slapi_ch_free_string(&updatedn_list);
96373c
-    slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\truv: %s configured and is %sdirty\n",
96373c
-                  r->repl_ruv ? "" : "not", r->repl_ruv_dirty ? "" : "not ");
96373c
     slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tCSN generator: %s configured\n",
96373c
                   r->repl_csngen ? "" : "not");
96373c
     /* JCMREPL - Dump Referrals */
96373c
@@ -1675,7 +1668,6 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
96373c
 
96373c
                     ruv_force_csn_update_from_ruv(upper_bound_ruv, r_ruv,
96373c
                                                   "Force update of database RUV (from CL RUV) -> ", SLAPI_LOG_NOTICE);
96373c
-                    replica_set_ruv_dirty(r);
96373c
                 }
96373c
 
96373c
             } else {
96373c
@@ -2778,11 +2770,6 @@ replica_write_ruv(Replica *r)
96373c
 
96373c
     replica_lock(r->repl_lock);
96373c
 
96373c
-    if (!r->repl_ruv_dirty) {
96373c
-        replica_unlock(r->repl_lock);
96373c
-        return rc;
96373c
-    }
96373c
-
96373c
     PR_ASSERT(r->repl_ruv);
96373c
 
96373c
     ruv_to_smod((RUV *)object_get_data(r->repl_ruv), &smod);
96373c
@@ -2817,14 +2804,10 @@ replica_write_ruv(Replica *r)
96373c
     /* ruv does not exist - create one */
96373c
     replica_lock(r->repl_lock);
96373c
 
96373c
-    if (rc == LDAP_SUCCESS) {
96373c
-        r->repl_ruv_dirty = PR_FALSE;
96373c
-    } else if (rc == LDAP_NO_SUCH_OBJECT) {
96373c
+    if (rc == LDAP_NO_SUCH_OBJECT) {
96373c
         /* this includes an internal operation - but since this only happens
96373c
            during server startup - its ok that we have lock around it */
96373c
         rc = _replica_configure_ruv(r, PR_TRUE);
96373c
-        if (rc == 0)
96373c
-            r->repl_ruv_dirty = PR_FALSE;
96373c
     } else /* error */
96373c
     {
96373c
         slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
96373c
@@ -3325,7 +3308,6 @@ replica_create_ruv_tombstone(Replica *r)
96373c
 
96373c
             if (ruv_init_new(csnstr, r->repl_rid, purl, &ruv) == RUV_SUCCESS) {
96373c
                 r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
96373c
-                r->repl_ruv_dirty = PR_TRUE;
96373c
                 return_value = LDAP_SUCCESS;
96373c
             } else {
96373c
                 slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_create_ruv_tombstone - "
96373c
@@ -3365,8 +3347,6 @@ replica_create_ruv_tombstone(Replica *r)
96373c
     slapi_add_internal_pb(pb);
96373c
     e = NULL; /* add consumes e, upon success or failure */
96373c
     slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &return_value);
96373c
-    if (return_value == LDAP_SUCCESS)
96373c
-        r->repl_ruv_dirty = PR_FALSE;
96373c
 
96373c
 done:
96373c
     slapi_entry_free(e);
96373c
@@ -3630,7 +3610,6 @@ replica_strip_cleaned_rids(Replica *r)
96373c
     ruv_get_cleaned_rids(ruv, rid);
96373c
     while (rid[i] != 0) {
96373c
         ruv_delete_replica(ruv, rid[i]);
96373c
-        replica_set_ruv_dirty(r);
96373c
         if (replica_write_ruv(r)) {
96373c
             slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
96373c
                           "replica_strip_cleaned_rids - Failed to write RUV\n");
96373c
@@ -3744,15 +3723,6 @@ replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv)
96373c
     }
96373c
 }
96373c
 
96373c
-void
96373c
-replica_set_ruv_dirty(Replica *r)
96373c
-{
96373c
-    PR_ASSERT(r);
96373c
-    replica_lock(r->repl_lock);
96373c
-    r->repl_ruv_dirty = PR_TRUE;
96373c
-    replica_unlock(r->repl_lock);
96373c
-}
96373c
-
96373c
 PRBool
96373c
 replica_is_state_flag_set(Replica *r, int32_t flag)
96373c
 {
96373c
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
96373c
index 9c8d6adbb..e025f34d8 100644
96373c
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
96373c
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
96373c
@@ -937,7 +937,6 @@ replica_config_change_type_and_id(Replica *r, const char *new_type, const char *
96373c
                     replica_reset_csn_pl(r);
96373c
                 }
96373c
                 ruv_delete_replica(ruv, oldrid);
96373c
-                replica_set_ruv_dirty(r);
96373c
                 cl5CleanRUV(oldrid);
96373c
                 replica_set_csn_assigned(r);
96373c
             }
96373c
@@ -1323,7 +1322,6 @@ replica_execute_cleanruv_task(Object *r, ReplicaId rid, char *returntext __attri
96373c
         return LDAP_UNWILLING_TO_PERFORM;
96373c
     }
96373c
     rc = ruv_delete_replica(local_ruv, rid);
96373c
-    replica_set_ruv_dirty(replica);
96373c
     if (replica_write_ruv(replica)) {
96373c
         slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "cleanAllRUV_task - Could not write RUV\n");
96373c
     }
96373c
-- 
96373c
2.13.6
96373c