Blob Blame History Raw
From 9a2e5bf96df70883fc5978471f72d6bd697116e0 Mon Sep 17 00:00:00 2001
From: Damani <damani@rubrik.com>
Date: Thu, 26 Sep 2019 08:27:53 -0600
Subject: [PATCH] Support for Rubrik

---
 .gitignore                                    |   3 +
 doc/user-guide/16-Rubrik-CDM.adoc             | 106 ++++++++++++++++++
 usr/share/rear/conf/default.conf              |  11 ++
 .../rear/prep/CDM/default/400_prep_cdm.sh     |   7 ++
 .../prep/CDM/default/450_check_cdm_client.sh  |  13 +++
 .../CDM/default/400_restore_with_cdm.sh       |  19 ++++
 .../410_use_replica_cdm_cluster_cert.sh       |  43 +++++++
 .../CDM/default/430_gen_rbs_uuid_for_cdm.sh   |  29 +++++
 .../verify/CDM/default/450_start_cdm_rbs.sh   |  17 +++
 9 files changed, 248 insertions(+)
 create mode 100644 doc/user-guide/16-Rubrik-CDM.adoc
 create mode 100644 usr/share/rear/prep/CDM/default/400_prep_cdm.sh
 create mode 100644 usr/share/rear/prep/CDM/default/450_check_cdm_client.sh
 create mode 100644 usr/share/rear/restore/CDM/default/400_restore_with_cdm.sh
 create mode 100644 usr/share/rear/verify/CDM/default/410_use_replica_cdm_cluster_cert.sh
 create mode 100644 usr/share/rear/verify/CDM/default/430_gen_rbs_uuid_for_cdm.sh
 create mode 100644 usr/share/rear/verify/CDM/default/450_start_cdm_rbs.sh

diff --git a/.gitignore b/.gitignore
index 5e3dc940..a644c865 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,6 @@
 build-stamp
 /var
 /etc/rear/site.conf
+.DS_Store
+.vscode
+servers
diff --git a/doc/user-guide/16-Rubrik-CDM.adoc b/doc/user-guide/16-Rubrik-CDM.adoc
new file mode 100644
index 00000000..41f37d20
--- /dev/null
+++ b/doc/user-guide/16-Rubrik-CDM.adoc
@@ -0,0 +1,106 @@
+= Documentation for the Rubrik Cloud Data Management (CDM) Backup and Restore Method
+
+== Summary
+
+The Rubrik CDM backup and restore method for ReaR allows Rubrik CDM to perform bare metal recovery of Linux systems that are supported by ReaR. It does this by including the installed Rubrik CDM RBS agent files in the ISO that is created by `rear mkrescue` via a pre-script in the fileset. The ISO is left in place under `/var/lib/rear/output/rear-<hostname>.iso` by default. During the fileset backup Rubrik will backup the main operating system files as well as the ReaR ISO file. 
+
+Bare Metal Recovery is performed by first restoring the ReaR ISO file from Rubrik CDM to an alternate host. Next the host being restored is booted from the ISO via CD/DVD, USB, vSphere Datastore ISO, etc... Once booted running `rear recover` will prepare the host for restore and start the Rubrik CDM RBS agent. If the host has a new IP address the new RBS agent will need to be registered with the Rubrik cluster. Registration is not necessary if the recovery host is reusing the same IP address as the original. All of the files for the host are then recovered from Rubrik CDM to the recovery host's `/mnt/local` directory by the user. Once complete the user exit's ReaR and reboots the host. 
+
+== Configuration
+
+1. Install and configure ReaR in accordance with:
+- Red Hat 
+   * https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/deployment_guide/ch-relax-and-recover_rear
+- Ubuntu
+   * http://manpages.ubuntu.com/manpages/disco/en/man8/rear.8.html
+- SUSE
+   * https://en.opensuse.org/SDB:Disaster_Recovery
+   * https://documentation.suse.com/sle-ha/15-SP1/html/SLE-HA-all/cha-ha-rear.html
+- Generic
+   * https://github.com/rear/rear
+
+   NOTE: Ignore any instructions to configure external storage like NFS, CIFS/SMB or ftp. Also ignore any instructions to configure a specific backup method. This will be taken care of in the next steps. 
+
+   NOTE: Ignore any instructions to schedule ReaR to run via the host based scheduler (cron). Rubrik CDM will run ReaR via a pre-script in the fileset. If this is not preferred ReaR can be scheduled on the host, however, the ISOs created may not be in sync with the backups.
+
+   NOTE: If installing the pre-release or development version for which there is no installer, copy the repo to the host being protected. Then run `make install` from its root directory of the repo. 
+
+1. Install the Rubrik CDM RBS agent as directed by the Rubrik documentation.
+1. Edit `/etc/rear/local.conf` and enter:
+
+   OUTPUT=ISO
+   BACKUP=CDM
+
+1. Test `ReaR` by running `rear -v mkrescue`
+1. Configure fileset backup of the host and add `/usr/sbin/rear mkrescue` as a prescript. 
+1. ISOs will be saved as `/var/lib/rear/output/*.iso`
+
+- Recovery 
+
+1. Recover `/var/lib/rear/output/rear-<hostname>.iso` from host to be restored. 
+1. Boot recovery machine using recovered ISO.
+   
+   NOTE: Recovered system will use the same networking as the original machine. Verify no IP conflicts will occur. 
+
+   NOTE: If the same static IP address may be used it will need to be changed if the original machine is still running.
+
+1. Verify Firewall is down on recovery host.
+1. Run `rear recover`
+1. Answer inline questions until `rear>` prompt appears.
+1. Run `ps -eaf` and verify that `backup_agent_main` and `bootstrap_agent_main` are running.
+1. Get the IP address of the system using `ip addr`
+1. Register the new IP with the Rubrik appliance (if needed)
+1. Perform a re-directed export of `/` to `/mnt/local`
+1. Reboot
+1. Recover other file systems as needed.
+
+   Note: that the Rubrik RBS agent will connect as the original machine now. The host may need to be reinstalled and re-registered if the original machine is still running. 
+
+== Known Issues
+
+* Recovery via IPv6 is not yet supported.
+* Automatic recovery from replica CDM cluster is not supported
+* CDM may take some time to recognize that the IP address has moved from one system to another. When restoring using the same IP give CDM up to 10 minutes to recognize that the agent is running on another machine. This usually comes up during testing when the original machine is shutdown but not being restored to. 
+* Recovery from a replica CDM cluster is only supported with CDM v4.2.1 and higher.
+* Care must be taken with SUSE systems on DHCP. They tend to request the same IP as the original host. If this is not the desired behavior the system will have to be adjusted after booting from the ReaR ISO.  
+* If multiple restores are performed using the same temporary IP, the temporary IP must first be deleted from Servers & Apps -> Linux and Unix Servers and re-added upon each reuse.
+* ReaR's `ldd` check of other binaries or libraries may result in libraries not being found. This can generally be fixed by adding the path to those libraries to the `LD_LIBRARY_PATH` variable in `/etc/rear/local.conf`. Do this by adding the following line in `/etc/rear/local.conf`:
++
+  export LD_LIBRARY_PATH-"$LD_LIBRARY_PATH:<path>"
++
+To make CentoOS v7.7 work the following line was needed:
++
+  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib64/bind9-export"
++
+To make CentOS v8.0 work the following line was needed:
++
+  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib64/bind9-export:/usr/lib64/eog:/usr/lib64/python3.6/site-packages:/usr/lib64/samba:/usr/lib64/firefox"
+
+== Troubleshooting
+
+* Verify that ReaR will recover your system without using the CDM backup and restore method. Most errors are due to configuration with ReaR itself and not Rubrik CDM. Use the default ReaR backup and restore method to test with. 
+* Follow the OS specific configuration guides as mentioned at the beginning of this document. 
+
+== Test Matrix
+
+.Test Matrix
+[%header,format=csv]
+|===
+Operating System,DHCP,Static IP,Virtual,Physical,LVM Root Disk,Plain Root Disk,EXT3,EXT4,XFS,BTRFS,Original Cluster,Replication Cluster
+CentOS 7.3,,pass,Pass,,Pass,,,,Pass,,Pass,
+CentOS 7.6,Pass,,Pass,,Pass,,,,Pass,,Pass,
+CentOS 7.7,Pass,,Pass,Pass,Pass,,,,Pass,,Pass,
+CentOS 8.0,Pass,,Pass,,Pass,,,,Pass,,Pass,
+CentOS 5.11,,,,,,,,,,,,
+CentOS 6.10,,,,,,,,,,,,
+RHEL 7.6,Pass,,Pass,,Pass,,,,,,,
+RHEL 7.4,,,,,,,,,,,,
+RHEL 6.10,,,,,,,,,,,,
+SUSE 11 SP4,,,,,,,,,,,,
+SUSE 12 SP4,Pass (uses same IP as original),,Pass,,,,,,,Pass,Pass,
+Ubuntu 14.04 LTS,,,,,,,,,,,,
+Ubuntu 16.04 LTS,Pass,,,,Pass,,,Pass,,,Pass,
+Ubuntu 17.04 LTS,,,,,,,,,,,,
+|===
+
+* Empty cells indicate that no tests were run.
diff --git a/usr/share/rear/conf/default.conf b/usr/share/rear/conf/default.conf
index b9f10e89..692e1874 100644
--- a/usr/share/rear/conf/default.conf
+++ b/usr/share/rear/conf/default.conf
@@ -1334,6 +1334,17 @@ LANG_RECOVER=C
 # low-quality master encryption key. For details, see the cryptsetup(8) manual page.
 LUKS_CRYPTSETUP_OPTIONS="--iter-time 2000 --use-random"
 
+##
+# BACKUP=CDM (Rubrik CDM; Cloud Data Managemnt)
+##
+# ReaR support for Rubrik Cloud Data Managment (CDM). 
+# ReaR will copy the Rubrk RBS agent and required OS binaries to its ISO for incluson on boot. 
+# ReaR will start the Rubrik RBS agent when 'rear recover' is run.
+COPY_AS_IS_CDM=( /etc/rubrik /usr/bin/rubrik /var/log/rubrik /etc/pki /usr/lib64 )
+COPY_AS_IS_EXCLUDE_CDM=( /var/log/rubrik/* )
+PROGS_CDM=( /usr/bin/rubrik/backup_agent_main /usr/bin/rubrik/bootstrap_agent_main openssl uuidgen )
+
+
 ##
 # BACKUP=FDRUPSTREAM stuff
 ##
diff --git a/usr/share/rear/prep/CDM/default/400_prep_cdm.sh b/usr/share/rear/prep/CDM/default/400_prep_cdm.sh
new file mode 100644
index 00000000..d3fd11b7
--- /dev/null
+++ b/usr/share/rear/prep/CDM/default/400_prep_cdm.sh
@@ -0,0 +1,7 @@
+#
+# prepare stuff for CDM
+#
+
+COPY_AS_IS=( "${COPY_AS_IS[@]}" "${COPY_AS_IS_CDM[@]}" )
+COPY_AS_IS_EXCLUDE=( "${COPY_AS_IS_EXCLUDE[@]}" "${COPY_AS_IS_EXCLUDE_CDM[@]}" )
+PROGS=( "${PROGS[@]}" "${PROGS_CDM[@]}" fmt )
diff --git a/usr/share/rear/prep/CDM/default/450_check_cdm_client.sh b/usr/share/rear/prep/CDM/default/450_check_cdm_client.sh
new file mode 100644
index 00000000..637fac5f
--- /dev/null
+++ b/usr/share/rear/prep/CDM/default/450_check_cdm_client.sh
@@ -0,0 +1,13 @@
+# 450_check_cdm_client.sh
+# 
+# This script checks if a Rubrik CDM client is installed and running
+#
+
+Log "Backup method is Rubrik (CDM): check backup_agent_main"
+if [ ! -x /usr/bin/rubrik/backup_agent_main ]; then
+    StopIfError 1 "Please install Rubrik (CDM) RBS client software."
+fi
+
+ps ax | grep -v grep | grep backup_agent_main
+StopIfError $? "Rubrik (CDM) RBS backup_agent_main was not running on this client."
+
diff --git a/usr/share/rear/restore/CDM/default/400_restore_with_cdm.sh b/usr/share/rear/restore/CDM/default/400_restore_with_cdm.sh
new file mode 100644
index 00000000..bc4811c4
--- /dev/null
+++ b/usr/share/rear/restore/CDM/default/400_restore_with_cdm.sh
@@ -0,0 +1,19 @@
+# 400_restore_with_cdm.sh
+#
+#
+
+LogPrint "Please start the restore process on the Rubrik (CDM) cluster."
+
+if is_true $CDM_NEW_AGENT_UUID; then
+  LogPrint ""
+  LogPrint "Register the appropriate IP address from this list with Rubrik (CDM):"
+  LogPrint "$( ip addr | grep inet | cut -d / -f 1 | grep -v 127.0.0.1 | grep -v ::1 )"
+  LogPrint ""
+fi
+LogPrint "Make sure all required data is restored to $TARGET_FS_ROOT ."
+LogPrint ""
+LogPrint "Next type 'exit' to continue the recovery."
+LogPrint "Info: You can check the recovery process i.e. with the command 'df'."
+LogPrint ""
+
+rear_shell "Has the restore been completed and are you ready to continue the recovery?"
diff --git a/usr/share/rear/verify/CDM/default/410_use_replica_cdm_cluster_cert.sh b/usr/share/rear/verify/CDM/default/410_use_replica_cdm_cluster_cert.sh
new file mode 100644
index 00000000..7f18f27a
--- /dev/null
+++ b/usr/share/rear/verify/CDM/default/410_use_replica_cdm_cluster_cert.sh
@@ -0,0 +1,43 @@
+# 410_use_replica_cdm_cluster_cert.sh
+# If restoring from a replica Rubrik (CDM) cluster use it's cert for RBS.
+
+CDM_RBA_DIR=/etc/rubrik
+CDM_KEYS_DIR=${CDM_RBA_DIR}/keys
+
+# When USER_INPUT_CDM_REPLICA_CLUSTER has any 'true' value be liberal in what you accept and assume exactly 'y' was actually meant:
+LogPrint ""
+is_true "$USER_INPUT_CDM_REPLICA_CLUSTER" && USER_INPUT_CDM_REPLICA_CLUSTER="y"
+while true ; do
+    # Find out if the restore is being done from the original CDM cluster or a Replica
+    # the default (i.e. the automated response after the timeout) should be 'n':
+    answer="$( UserInput -I CDM_REPLICA_CLUSTER -p "Is the data being restored from the original CDM Cluster? (y/n)" -D 'y' -t 300 )"
+    is_true "$answer" && return 0
+    if is_false "$answer" ; then
+        break
+    fi
+    UserOutput "Please answer 'y' or 'n'"
+done
+
+while true; do
+    CDM_CLUSTER_IP="$(UserInput -I USER_INPUT_CDM_CLUSTER_IP -r -t 0 -p "Enter one of the IP addresses for the replica CDM cluster: ")"
+    [[ -n "$CDM_CLUSTER_IP" ]] && break
+    PrintError "Please enter a non-empty CDM cluster IP."
+done
+
+CDM_SUNOS_TAR=rubrik-agent-sunos5.10.sparc.tar.gz
+cd /tmp
+/usr/bin/curl $v -kLOJ https://${CDM_CLUSTER_IP}/connector/${CDM_SUNOS_TAR} 
+StopIfError "Could not download https://${CDM_CLUSTER_IP}/connector/${CDM_SUNOS_TAR}"
+
+/usr/bin/tar $v -xzf  $CDM_SUNOS_TAR
+StopIfError "Could not extract $CDM_SUNOS_TAR"
+
+CDM_CERT_FILE=$(find ./ -name "rubrik.crt")
+mv $v ${CDM_KEYS_DIR}/rubrik.crt ${CDM_KEYS_DIR}/rubrik.crt.orig
+cp $v $CDM_CERT_FILE $CDM_KEYS_DIR
+StopIfError "Could not copy replica CDM cluster certificate"
+
+/usr/bin/chmod $v 600 ${CDM_KEYS_DIR}/rubrik.crt
+
+
+LogPrint "Replica Rubrik (CDM) cluster certificate installed."
diff --git a/usr/share/rear/verify/CDM/default/430_gen_rbs_uuid_for_cdm.sh b/usr/share/rear/verify/CDM/default/430_gen_rbs_uuid_for_cdm.sh
new file mode 100644
index 00000000..5e99b79c
--- /dev/null
+++ b/usr/share/rear/verify/CDM/default/430_gen_rbs_uuid_for_cdm.sh
@@ -0,0 +1,29 @@
+# 430_gen_rbs_uuid_for_cdm.sh
+# Reset the UUID used by RBS if the IP address has changed
+
+CDM_RBA_DIR=/etc/rubrik
+CDM_AGENT_UUID=${CDM_RBA_DIR}/conf/uuid
+
+# When USER_INPUT_CDM_SAME_AGENT_UUID has Does this client have the same IP address as the original 'y' was actually meant:
+LogPrint ""
+LogPrint "Found the following IP addresses on this system:"
+LogPrint "$( ip addr | grep inet | cut -d / -f 1 | grep -v 127.0.0.1 | grep -v ::1 )"
+LogPrint ""
+is_true "$USER_INPUT_CDM_SAME_AGENT_UUID" && USER_INPUT_SAME_AGENT_UUID="y"
+while true ; do
+    # Find out if the IP address has changed from the original. If so generate a new UUID.
+    # the default (i.e. the automated response after the timeout) should be 'n':
+    answer="$( UserInput -I CDM_SAME_AGENT_UUID -p "Does this client have the same IP address as the original? (y/n)" -D 'y' -t 300 )"
+    is_true "$answer" && return 0
+    if is_false "$answer" ; then
+        break
+    fi
+    UserOutput "Please answer 'y' or 'n'"
+done
+
+mv $v ${CDM_AGENT_UUID} ${CDM_AGENT_UUID}.old
+/usr/bin/uuidgen | tee -a ${CDM_AGENT_UUID} >&2
+StopIfError "Unable to generate new UUID"
+
+CDM_NEW_AGENT_UUID="true"
+LogPrint "Rubrik (CDM) RBS agent now has new UUID."
diff --git a/usr/share/rear/verify/CDM/default/450_start_cdm_rbs.sh b/usr/share/rear/verify/CDM/default/450_start_cdm_rbs.sh
new file mode 100644
index 00000000..571da1da
--- /dev/null
+++ b/usr/share/rear/verify/CDM/default/450_start_cdm_rbs.sh
@@ -0,0 +1,17 @@
+# 450_start_cdm_rbs.sh
+# Start the Rubrik (CDM) RBS Agent
+
+RBA_DIR=/etc/rubrik
+RBA_BIN_DIR=/usr/bin/rubrik
+
+BOOTSTRAP_DAEMON_OPTS="$( < ${RBA_DIR}/conf/bootstrap_flags.conf )"
+AGENT_DAEMON_OPTS="$( < ${RBA_DIR}/conf/agent_flags.conf )"
+BOOTSTRAP_DAEMON=$RBA_BIN_DIR/bootstrap_agent_main
+AGENT_DAEMON=$RBA_BIN_DIR/backup_agent_main
+
+$BOOTSTRAP_DAEMON $BOOTSTRAP_DAEMON_OPTS
+StopIfError "Unable to start RBS Bootstrap service"
+$AGENT_DAEMON $AGENT_DAEMON_OPTS
+StopIfError "Unable to start RBS Agent service"
+
+LogPrint "Rubrik (CDM) RBS agent started."
-- 
2.25.4