Blame SOURCES/0036-Ticket-48118-At-startup-changelog-can-be-erronously-.patch

058656
From 127e0d954eb7741c4afdc0305f7970b7ea164e8d Mon Sep 17 00:00:00 2001
058656
From: Ludwig Krispenz <lkrispen@redhat.com>
058656
Date: Thu, 9 Nov 2017 11:28:34 +0100
058656
Subject: [PATCH]     Ticket 48118 - At startup, changelog can be erronously
058656
 rebuilt after a normal shutdown
058656
058656
    Problem: There are two problems that can lead to inconsistent database and changelog maxruv:
058656
    1] the database ruv is written periodically in th ehouskeeping thread and at shutdown. It
058656
       relies on teh ruv_dirty flag, but due to a race condition this can be reset befor writing
058656
       the ruv
058656
    2] the changelog max ruv is updated whenever an operation is commutted, but in case of internal
058656
       operations inside the txn for a client operation, if the operation is aborted the cl maxruv
058656
       is not reset. Since it is only written at shutdown this normally is no problem, but if the
058656
       aborted operation is the last before shutdown or is aborted by shutdown the cl ruv is incorrect
058656
058656
    Fix: the fix is in two parts:
058656
    1] remove the use of the dirty flag, ensure that the ruv is always written. The overhead for writing
058656
       a database ruv that has not changed is minimal
058656
    2] when writing the changelog maxruv check if the macsns it contains are really present in the
058656
       changelog. If not the maxruv is not written, it will be reconstructed at the next startup
058656
058656
    Reviewed by: William,Thierry - Thanks
058656
---
058656
 ldap/servers/plugins/replication/cl5_api.c         | 39 ++++++++++++++++++++++
058656
 ldap/servers/plugins/replication/repl5.h           |  1 -
058656
 ldap/servers/plugins/replication/repl5_replica.c   | 32 +-----------------
058656
 .../plugins/replication/repl5_replica_config.c     |  2 --
058656
 4 files changed, 40 insertions(+), 34 deletions(-)
058656
058656
diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c
058656
index ec648c014..55032dfb0 100644
058656
--- a/ldap/servers/plugins/replication/cl5_api.c
058656
+++ b/ldap/servers/plugins/replication/cl5_api.c
058656
@@ -250,6 +250,8 @@ static void _cl5ReadBerval(struct berval *bv, char **buff);
058656
 static void _cl5WriteBerval(struct berval *bv, char **buff);
058656
 static int _cl5ReadBervals(struct berval ***bv, char **buff, unsigned int size);
058656
 static int _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size);
058656
+static int64_t _cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv);
058656
+static int64_t _cl5CheckCSNinCL(const ruv_enum_data *element, void *arg);
058656
 
058656
 /* replay iteration */
058656
 #ifdef FOR_DEBUGGING
058656
@@ -2716,6 +2718,36 @@ _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size)
058656
     return CL5_SUCCESS;
058656
 }
058656
 
058656
+static int64_t
058656
+_cl5CheckCSNinCL(const ruv_enum_data *element, void *arg)
058656
+{
058656
+    CL5DBFile *file = (CL5DBFile *)arg;
058656
+    int rc = 0;
058656
+
058656
+    DBT key = {0}, data = {0};
058656
+    char csnStr[CSN_STRSIZE];
058656
+
058656
+    /* construct the key */
058656
+    key.data = csn_as_string(element->csn, PR_FALSE, csnStr);
058656
+    key.size = CSN_STRSIZE;
058656
+
058656
+    data.flags = DB_DBT_MALLOC;
058656
+
058656
+    rc = file->db->get(file->db, NULL /*txn*/, &key, &data, 0);
058656
+
058656
+    slapi_ch_free(&(data.data));
058656
+    return rc;
058656
+}
058656
+
058656
+static int64_t
058656
+_cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv)
058656
+{
058656
+    int rc = 0;
058656
+
058656
+    rc = ruv_enumerate_elements(maxruv, _cl5CheckCSNinCL, (void *)file);
058656
+
058656
+    return rc;
058656
+}
058656
 /* upgrade from db33 to db41
058656
  * 1. Run recovery on the database environment using the DB_ENV->open method
058656
  * 2. Remove any Berkeley DB environment using the DB_ENV->remove method
058656
@@ -4010,6 +4042,13 @@ _cl5WriteRUV(CL5DBFile *file, PRBool purge)
058656
         rc = ruv_to_bervals(file->maxRUV, &vals);
058656
     }
058656
 
058656
+    if (!purge && _cl5CheckMaxRUV(file, file->maxRUV)) {
058656
+        slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name_cl,
058656
+                      "_cl5WriteRUV - changelog maxRUV not found in changelog for file %s\n",
058656
+                      file->name);
058656
+        return CL5_DB_ERROR;
058656
+    }
058656
+
058656
     key.size = CSN_STRSIZE;
058656
 
058656
     rc = _cl5WriteBervals(vals, &buff, &data.size);
058656
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
058656
index c6e79b7e2..4e206a0fc 100644
058656
--- a/ldap/servers/plugins/replication/repl5.h
058656
+++ b/ldap/servers/plugins/replication/repl5.h
058656
@@ -725,7 +725,6 @@ Object *replica_get_for_backend(const char *be_name);
058656
 void replica_set_purge_delay(Replica *r, uint32_t purge_delay);
058656
 void replica_set_tombstone_reap_interval(Replica *r, long interval);
058656
 void replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv);
058656
-void replica_set_ruv_dirty(Replica *r);
058656
 Slapi_Entry *get_in_memory_ruv(Slapi_DN *suffix_sdn);
058656
 int replica_write_ruv(Replica *r);
058656
 char *replica_get_dn(Replica *r);
058656
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
058656
index e5296bf1c..77f4f18e4 100644
058656
--- a/ldap/servers/plugins/replication/repl5_replica.c
058656
+++ b/ldap/servers/plugins/replication/repl5_replica.c
058656
@@ -41,7 +41,6 @@ struct replica
058656
     ReplicaType repl_type;             /* is this replica read-only ? */
058656
     ReplicaId repl_rid;                /* replicaID */
058656
     Object *repl_ruv;                  /* replica update vector */
058656
-    PRBool repl_ruv_dirty;             /* Dirty flag for ruv */
058656
     CSNPL *min_csn_pl;                 /* Pending list for minimal CSN */
058656
     void *csn_pl_reg_id;               /* registration assignment for csn callbacks */
058656
     unsigned long repl_state_flags;    /* state flags */
058656
@@ -788,7 +787,6 @@ replica_set_ruv(Replica *r, RUV *ruv)
058656
     }
058656
 
058656
     r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
058656
-    r->repl_ruv_dirty = PR_TRUE;
058656
 
058656
     replica_unlock(r->repl_lock);
058656
 }
058656
@@ -860,9 +858,6 @@ replica_update_ruv(Replica *r, const CSN *updated_csn, const char *replica_purl)
058656
                                                     "to update RUV for replica %s, csn = %s\n",
058656
                                   slapi_sdn_get_dn(r->repl_root),
058656
                                   csn_as_string(updated_csn, PR_FALSE, csn_str));
058656
-                } else {
058656
-                    /* RUV updated - mark as dirty */
058656
-                    r->repl_ruv_dirty = PR_TRUE;
058656
                 }
058656
             } else {
058656
                 slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name,
058656
@@ -1347,8 +1342,6 @@ replica_dump(Replica *r)
058656
     slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tupdate dn: %s\n",
058656
                   updatedn_list ? updatedn_list : "not configured");
058656
     slapi_ch_free_string(&updatedn_list);
058656
-    slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\truv: %s configured and is %sdirty\n",
058656
-                  r->repl_ruv ? "" : "not", r->repl_ruv_dirty ? "" : "not ");
058656
     slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tCSN generator: %s configured\n",
058656
                   r->repl_csngen ? "" : "not");
058656
     /* JCMREPL - Dump Referrals */
058656
@@ -1675,7 +1668,6 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
058656
 
058656
                     ruv_force_csn_update_from_ruv(upper_bound_ruv, r_ruv,
058656
                                                   "Force update of database RUV (from CL RUV) -> ", SLAPI_LOG_NOTICE);
058656
-                    replica_set_ruv_dirty(r);
058656
                 }
058656
 
058656
             } else {
058656
@@ -2778,11 +2770,6 @@ replica_write_ruv(Replica *r)
058656
 
058656
     replica_lock(r->repl_lock);
058656
 
058656
-    if (!r->repl_ruv_dirty) {
058656
-        replica_unlock(r->repl_lock);
058656
-        return rc;
058656
-    }
058656
-
058656
     PR_ASSERT(r->repl_ruv);
058656
 
058656
     ruv_to_smod((RUV *)object_get_data(r->repl_ruv), &smod);
058656
@@ -2817,14 +2804,10 @@ replica_write_ruv(Replica *r)
058656
     /* ruv does not exist - create one */
058656
     replica_lock(r->repl_lock);
058656
 
058656
-    if (rc == LDAP_SUCCESS) {
058656
-        r->repl_ruv_dirty = PR_FALSE;
058656
-    } else if (rc == LDAP_NO_SUCH_OBJECT) {
058656
+    if (rc == LDAP_NO_SUCH_OBJECT) {
058656
         /* this includes an internal operation - but since this only happens
058656
            during server startup - its ok that we have lock around it */
058656
         rc = _replica_configure_ruv(r, PR_TRUE);
058656
-        if (rc == 0)
058656
-            r->repl_ruv_dirty = PR_FALSE;
058656
     } else /* error */
058656
     {
058656
         slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
058656
@@ -3325,7 +3308,6 @@ replica_create_ruv_tombstone(Replica *r)
058656
 
058656
             if (ruv_init_new(csnstr, r->repl_rid, purl, &ruv) == RUV_SUCCESS) {
058656
                 r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
058656
-                r->repl_ruv_dirty = PR_TRUE;
058656
                 return_value = LDAP_SUCCESS;
058656
             } else {
058656
                 slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_create_ruv_tombstone - "
058656
@@ -3365,8 +3347,6 @@ replica_create_ruv_tombstone(Replica *r)
058656
     slapi_add_internal_pb(pb);
058656
     e = NULL; /* add consumes e, upon success or failure */
058656
     slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &return_value);
058656
-    if (return_value == LDAP_SUCCESS)
058656
-        r->repl_ruv_dirty = PR_FALSE;
058656
 
058656
 done:
058656
     slapi_entry_free(e);
058656
@@ -3630,7 +3610,6 @@ replica_strip_cleaned_rids(Replica *r)
058656
     ruv_get_cleaned_rids(ruv, rid);
058656
     while (rid[i] != 0) {
058656
         ruv_delete_replica(ruv, rid[i]);
058656
-        replica_set_ruv_dirty(r);
058656
         if (replica_write_ruv(r)) {
058656
             slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
058656
                           "replica_strip_cleaned_rids - Failed to write RUV\n");
058656
@@ -3744,15 +3723,6 @@ replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv)
058656
     }
058656
 }
058656
 
058656
-void
058656
-replica_set_ruv_dirty(Replica *r)
058656
-{
058656
-    PR_ASSERT(r);
058656
-    replica_lock(r->repl_lock);
058656
-    r->repl_ruv_dirty = PR_TRUE;
058656
-    replica_unlock(r->repl_lock);
058656
-}
058656
-
058656
 PRBool
058656
 replica_is_state_flag_set(Replica *r, int32_t flag)
058656
 {
058656
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
058656
index 9c8d6adbb..e025f34d8 100644
058656
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
058656
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
058656
@@ -937,7 +937,6 @@ replica_config_change_type_and_id(Replica *r, const char *new_type, const char *
058656
                     replica_reset_csn_pl(r);
058656
                 }
058656
                 ruv_delete_replica(ruv, oldrid);
058656
-                replica_set_ruv_dirty(r);
058656
                 cl5CleanRUV(oldrid);
058656
                 replica_set_csn_assigned(r);
058656
             }
058656
@@ -1323,7 +1322,6 @@ replica_execute_cleanruv_task(Object *r, ReplicaId rid, char *returntext __attri
058656
         return LDAP_UNWILLING_TO_PERFORM;
058656
     }
058656
     rc = ruv_delete_replica(local_ruv, rid);
058656
-    replica_set_ruv_dirty(replica);
058656
     if (replica_write_ruv(replica)) {
058656
         slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "cleanAllRUV_task - Could not write RUV\n");
058656
     }
058656
-- 
058656
2.13.6
058656