Blame SOURCES/0011-Issue-50599-Remove-db-region-files-prior-to-db-recov.patch

8394b4
From 74525da09050809a55e2c6bba4c42b27d5326a8f Mon Sep 17 00:00:00 2001
8394b4
From: Mark Reynolds <mreynolds@redhat.com>
8394b4
Date: Sun, 12 Jan 2020 20:11:07 -0500
8394b4
Subject: [PATCH] Issue 50599 - Remove db region files prior to db recovery
8394b4
8394b4
Bug Description:  If the server crashes then the region files can become
8394b4
                  corrupted and this prevents the server from starting.
8394b4
8394b4
Fix Description:  If we encounter a disorderly shutdown, then remove
8394b4
                  the region files so there is a clean slate to start
8394b4
                  with.
8394b4
8394b4
                  Also cleaned up function typo:  slapi_disordely_shutdown
8394b4
8394b4
relates: https://pagure.io/389-ds-base/issue/50599
8394b4
8394b4
Reviewed by: firstyear & lkrispen (Thanks!!)
8394b4
---
8394b4
 .../plugins/replication/repl5_replica.c       |  4 +-
8394b4
 .../slapd/back-ldbm/db-bdb/bdb_layer.c        | 88 ++++++++++---------
8394b4
 ldap/servers/slapd/plugin.c                   |  8 +-
8394b4
 ldap/servers/slapd/slapi-plugin.h             |  2 +-
8394b4
 4 files changed, 54 insertions(+), 48 deletions(-)
8394b4
8394b4
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
8394b4
index 94507bff8..02caa88d9 100644
8394b4
--- a/ldap/servers/plugins/replication/repl5_replica.c
8394b4
+++ b/ldap/servers/plugins/replication/repl5_replica.c
8394b4
@@ -1657,7 +1657,7 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
8394b4
              * sessions.
8394b4
              */
8394b4
 
8394b4
-            if (slapi_disordely_shutdown(PR_FALSE)) {
8394b4
+            if (slapi_disorderly_shutdown(PR_FALSE)) {
8394b4
                 slapi_log_err(SLAPI_LOG_WARNING, repl_plugin_name, "replica_check_for_data_reload - "
8394b4
                                                                    "Disorderly shutdown for replica %s. Check if DB RUV needs to be updated\n",
8394b4
                               slapi_sdn_get_dn(r->repl_root));
8394b4
@@ -1701,7 +1701,7 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
8394b4
                                   slapi_sdn_get_dn(r->repl_root));
8394b4
                     rc = 0;
8394b4
                 }
8394b4
-            } /* slapi_disordely_shutdown */
8394b4
+            } /* slapi_disorderly_shutdown */
8394b4
 
8394b4
             object_release(ruv_obj);
8394b4
         } else /* we have no changes currently logged for this replica */
8394b4
diff --git a/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c b/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
8394b4
index 10f6d401e..2103dac38 100644
8394b4
--- a/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
8394b4
+++ b/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
8394b4
@@ -15,6 +15,8 @@
8394b4
 #include <prclist.h>
8394b4
 #include <sys/types.h>
8394b4
 #include <sys/statvfs.h>
8394b4
+#include <glob.h>
8394b4
+
8394b4
 
8394b4
 #define DB_OPEN(oflags, db, txnid, file, database, type, flags, mode, rval)                                     \
8394b4
     {                                                                                                           \
8394b4
@@ -990,10 +992,9 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
     return_value = dblayer_grok_directory(region_dir,
8394b4
                                           DBLAYER_DIRECTORY_READWRITE_ACCESS);
8394b4
     if (0 != return_value) {
8394b4
-        slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Can't start because the database "
8394b4
-                                                       "directory \"%s\" either doesn't exist, or is not "
8394b4
-                                                       "accessible\n",
8394b4
-                      region_dir);
8394b4
+        slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                "Can't start because the database directory \"%s\" either doesn't exist, or is not accessible\n",
8394b4
+                region_dir);
8394b4
         return return_value;
8394b4
     }
8394b4
 
8394b4
@@ -1003,10 +1004,9 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
         return_value = dblayer_grok_directory(log_dir,
8394b4
                                               DBLAYER_DIRECTORY_READWRITE_ACCESS);
8394b4
         if (0 != return_value) {
8394b4
-            slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Can't start because the log "
8394b4
-                                                           "directory \"%s\" either doesn't exist, or is not "
8394b4
-                                                           "accessible\n",
8394b4
-                          log_dir);
8394b4
+            slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                    "Can't start because the log directory \"%s\" either doesn't exist, or is not accessible\n",
8394b4
+                    log_dir);
8394b4
             return return_value;
8394b4
         }
8394b4
     }
8394b4
@@ -1057,15 +1057,27 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
         if (conf->bdb_recovery_required) {
8394b4
             open_flags |= DB_RECOVER;
8394b4
             if (DBLAYER_RESTORE_MODE & dbmode) {
8394b4
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Recovering database after restore "
8394b4
-                                                                 "from archive.\n");
8394b4
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
8394b4
+                        "Recovering database after restore from archive.\n");
8394b4
             } else if (DBLAYER_CLEAN_RECOVER_MODE & dbmode) {
8394b4
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Clean up db environment and start "
8394b4
-                                                                 "from archive.\n");
8394b4
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
8394b4
+                        "Clean up db environment and start from archive.\n");
8394b4
             } else {
8394b4
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Detected Disorderly Shutdown last "
8394b4
-                                                                 "time Directory Server was running, recovering database.\n");
8394b4
-                slapi_disordely_shutdown(PR_TRUE);
8394b4
+                glob_t globbuf;
8394b4
+                char file_pattern[MAXPATHLEN];
8394b4
+
8394b4
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
8394b4
+                        "Detected Disorderly Shutdown last time Directory Server was running, recovering database.\n");
8394b4
+                slapi_disorderly_shutdown(PR_TRUE);
8394b4
+
8394b4
+                /* Better wipe out the region files to help ensure a clean start */
8394b4
+                PR_snprintf(file_pattern, MAXPATHLEN, "%s/%s", region_dir, "__db.*");
8394b4
+                if (glob(file_pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) {
8394b4
+                    for (size_t i = 0; i < globbuf.gl_pathc; i++) {
8394b4
+                        remove(globbuf.gl_pathv[i]);
8394b4
+                    }
8394b4
+                    globfree(&globbuf);
8394b4
+                }
8394b4
             }
8394b4
         }
8394b4
         switch (dbmode & DBLAYER_RESTORE_MASK) {
8394b4
@@ -1121,7 +1133,7 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
              */
8394b4
             if (conf->bdb_lock_config <= BDB_LOCK_NB_MIN) {
8394b4
                 slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "New max db lock count is too small.  "
8394b4
-                                                                 "Resetting it to the default value %d.\n",
8394b4
+                              "Resetting it to the default value %d.\n",
8394b4
                               BDB_LOCK_NB_MIN);
8394b4
                 conf->bdb_lock_config = BDB_LOCK_NB_MIN;
8394b4
             }
8394b4
@@ -1165,29 +1177,26 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
     if ((open_flags & DB_RECOVER) || (open_flags & DB_RECOVER_FATAL)) {
8394b4
         /* Recover, then close, then open again */
8394b4
         int recover_flags = open_flags & ~DB_THREAD;
8394b4
-
8394b4
         if (DBLAYER_CLEAN_RECOVER_MODE & dbmode) /* upgrade case */
8394b4
         {
8394b4
             DB_ENV *thisenv = pEnv->bdb_DB_ENV;
8394b4
             return_value = thisenv->remove(thisenv, region_dir, DB_FORCE);
8394b4
             if (0 != return_value) {
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "Failed to remove old db env "
8394b4
-                                               "in %s: %s\n",
8394b4
-                              region_dir,
8394b4
-                              dblayer_strerror(return_value));
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "Failed to remove old db env in %s: %s\n",
8394b4
+                        region_dir, dblayer_strerror(return_value));
8394b4
                 return return_value;
8394b4
             }
8394b4
             dbmode = DBLAYER_NORMAL_MODE;
8394b4
 
8394b4
             if ((return_value = bdb_make_env(&pEnv, li)) != 0) {
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "Failed to create DBENV (returned: %d).\n",
8394b4
-                              return_value);
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "Failed to create DBENV (returned: %d).\n", return_value);
8394b4
                 return return_value;
8394b4
             }
8394b4
         }
8394b4
 
8394b4
+
8394b4
         return_value = (pEnv->bdb_DB_ENV->open)(
8394b4
             pEnv->bdb_DB_ENV,
8394b4
             region_dir,
8394b4
@@ -1201,27 +1210,25 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
                  */
8394b4
                 slapi_log_err(SLAPI_LOG_CRIT,
8394b4
                               "bdb_start", "mmap in opening database environment (recovery mode) "
8394b4
-                                               "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
8394b4
+                              "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
8394b4
                               li->li_dbcachesize, return_value, dblayer_strerror(return_value));
8394b4
                 bdb_free_env(&priv->dblayer_env);
8394b4
                 priv->dblayer_env = CATASTROPHIC;
8394b4
             } else {
8394b4
                 slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Database Recovery Process FAILED. "
8394b4
-                                                               "The database is not recoverable. err=%d: %s\n",
8394b4
+                              "The database is not recoverable. err=%d: %s\n",
8394b4
                               return_value, dblayer_strerror(return_value));
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "Please make sure there is enough disk space for "
8394b4
-                                               "dbcache (%" PRIu64 " bytes) and db region files\n",
8394b4
-                              li->li_dbcachesize);
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "Please make sure there is enough disk space for dbcache (%" PRIu64 " bytes) and db region files\n",
8394b4
+                        li->li_dbcachesize);
8394b4
             }
8394b4
             return return_value;
8394b4
         } else {
8394b4
             open_flags &= ~(DB_RECOVER | DB_RECOVER_FATAL);
8394b4
             pEnv->bdb_DB_ENV->close(pEnv->bdb_DB_ENV, 0);
8394b4
             if ((return_value = bdb_make_env(&pEnv, li)) != 0) {
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "Failed to create DBENV (returned: %d).\n",
8394b4
-                              return_value);
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "Failed to create DBENV (returned: %d).\n", return_value);
8394b4
                 return return_value;
8394b4
             }
8394b4
             bdb_free_env(&priv->dblayer_env);
8394b4
@@ -1288,16 +1295,15 @@ bdb_start(struct ldbminfo *li, int dbmode)
8394b4
                  * https://blackflag.mcom.com/show_bug.cgi?id=557319
8394b4
                  * Crash ns-slapd while running scalab01 after restart slapd
8394b4
                  */
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "mmap in opening database environment "
8394b4
-                                               "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
8394b4
-                              li->li_dbcachesize, return_value, dblayer_strerror(return_value));
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "mmap in opening database environment failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
8394b4
+                        li->li_dbcachesize, return_value, dblayer_strerror(return_value));
8394b4
                 bdb_free_env(&priv->dblayer_env);
8394b4
                 priv->dblayer_env = CATASTROPHIC;
8394b4
             } else {
8394b4
-                slapi_log_err(SLAPI_LOG_CRIT,
8394b4
-                              "bdb_start", "Opening database environment (%s) failed. err=%d: %s\n",
8394b4
-                              region_dir, return_value, dblayer_strerror(return_value));
8394b4
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
8394b4
+                        "Opening database environment (%s) failed. err=%d: %s\n",
8394b4
+                        region_dir, return_value, dblayer_strerror(return_value));
8394b4
             }
8394b4
         }
8394b4
         return return_value;
8394b4
diff --git a/ldap/servers/slapd/plugin.c b/ldap/servers/slapd/plugin.c
8394b4
index b00c1bd8f..282b98738 100644
8394b4
--- a/ldap/servers/slapd/plugin.c
8394b4
+++ b/ldap/servers/slapd/plugin.c
8394b4
@@ -4383,14 +4383,14 @@ slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb)
8394b4
 }
8394b4
 
8394b4
 PRBool
8394b4
-slapi_disordely_shutdown(PRBool set)
8394b4
+slapi_disorderly_shutdown(PRBool set)
8394b4
 {
8394b4
-    static PRBool is_disordely_shutdown = PR_FALSE;
8394b4
+    static PRBool is_disorderly_shutdown = PR_FALSE;
8394b4
 
8394b4
     if (set) {
8394b4
-        is_disordely_shutdown = PR_TRUE;
8394b4
+        is_disorderly_shutdown = PR_TRUE;
8394b4
     }
8394b4
-    return (is_disordely_shutdown);
8394b4
+    return (is_disorderly_shutdown);
8394b4
 }
8394b4
 
8394b4
 /*
8394b4
diff --git a/ldap/servers/slapd/slapi-plugin.h b/ldap/servers/slapd/slapi-plugin.h
8394b4
index 29a6238d9..50b8d12c8 100644
8394b4
--- a/ldap/servers/slapd/slapi-plugin.h
8394b4
+++ b/ldap/servers/slapd/slapi-plugin.h
8394b4
@@ -7900,7 +7900,7 @@ uint64_t slapi_str_to_u64(const char *s);
8394b4
 
8394b4
 void slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb);
8394b4
 
8394b4
-PRBool slapi_disordely_shutdown(PRBool set);
8394b4
+PRBool slapi_disorderly_shutdown(PRBool set);
8394b4
 
8394b4
 /*
8394b4
  * Public entry extension getter/setter functions
8394b4
-- 
8394b4
2.21.1
8394b4