| #!/bin/bash -ue |
| |
| # Copyright (C) 2010-2014 Codership Oy |
| # Copyright (C) 2017-2020 Damien Ciabrini <damien.ciabrini@gmail.com> |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; version 2 of the License. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; see the file COPYING. If not, write to the |
| # Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston |
| # MA 02110-1301 USA. |
| |
| # This is a reference script for rsync-based state snapshot tansfer |
| # over an encrypted communication channel, managed by socat |
| |
| RSYNC_PID= # rsync pid file |
| RSYNC_CONF= # rsync configuration file |
| RSYNC_REAL_PID= # rsync process id |
| |
| SOCAT_PID= # socat pid file |
| SOCAT_REAL_PID= # socat process id |
| |
| SOCAT_OPTS= # openssl connection args |
| |
| MODULE="rsync_tunnel_sst" |
| |
| OS=$(uname) |
| [ "$OS" == "Darwin" ] && export -n LD_LIBRARY_PATH |
| |
| # Setting the path for lsof on CentOS |
| export PATH="/usr/sbin:/sbin:$PATH" |
| |
| . $(dirname $0)/wsrep_sst_common |
| |
| wsrep_check_programs rsync socat |
| |
| cleanup_pid() |
| { |
| local real_pid=$1 |
| [ "0" != "$real_pid" ] && \ |
| kill $real_pid && \ |
| sleep 0.5 && \ |
| kill -9 $real_pid >/dev/null 2>&1 || \ |
| : |
| } |
| |
| cleanup_tunnel() |
| { |
| if [ -n "$SOCAT_REAL_PID" ] && ps -p "$SOCAT_REAL_PID" >/dev/null 2>&1; then |
| wsrep_log_info "cleanup socat PID: $SOCAT_REAL_PID" |
| cleanup_pid $SOCAT_REAL_PID |
| fi |
| rm -rf "$SOCAT_PID" |
| } |
| |
| cleanup_joiner() |
| { |
| wsrep_log_info "Joiner cleanup. rsync PID: $RSYNC_REAL_PID" |
| [ -n "$RSYNC_REAL_PID" ] && cleanup_pid $RSYNC_REAL_PID |
| rm -rf "$RSYNC_CONF" |
| rm -rf "$MAGIC_FILE" |
| rm -rf "$RSYNC_PID" |
| |
| cleanup_tunnel |
| |
| wsrep_log_info "Joiner cleanup done." |
| if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then |
| wsrep_cleanup_progress_file |
| fi |
| } |
| |
| # Check whether process is still running. |
| check_pid() |
| { |
| local pid_file=$1 |
| [ -r "$pid_file" ] && ps -p $(cat $pid_file) >/dev/null 2>&1 |
| } |
| |
| check_pid_and_port() |
| { |
| local pid_file=$1 |
| local service_pid=$2 |
| local service_port=$3 |
| local service_host=$4 |
| local service_name=$5 |
| |
| if ! which lsof > /dev/null; then |
| wsrep_log_error "lsof tool not found in PATH! Make sure you have it installed." |
| exit 2 # ENOENT |
| fi |
| |
| local port_info=$(lsof -i "@"$service_host:$service_port -Pn 2>/dev/null | \ |
| grep "(LISTEN)") |
| local is_service=$(echo $port_info | \ |
| grep -w '^'"$service_name"'[[:space:]]\+'"$service_pid" 2>/dev/null) |
| |
| if [ -n "$port_info" -a -z "$is_service" ]; then |
| wsrep_log_error "$service_name daemon port '$service_port' has been taken" |
| exit 16 # EBUSY |
| fi |
| |
| if ! check_pid $pid_file; then |
| wsrep_log_error "$service_name process terminated unexpectedly" |
| exit 10 # ECHILD |
| fi |
| |
| [ -n "$port_info" ] && [ -n "$is_service" ] && \ |
| [ $(cat $pid_file) -eq $service_pid ] |
| } |
| |
| config_from_cnf() |
| { |
| local group=$1 |
| local key=$2 |
| echo $($MY_PRINT_DEFAULTS $group | grep -- "--$key=" | cut -d= -f2- | tail -1) |
| } |
| |
| setup_tunnel_args() |
| { |
| tca=$(config_from_cnf sst tca) |
| tkey=$(config_from_cnf sst tkey) |
| tcert=$(config_from_cnf sst tcert) |
| sockopt=$(config_from_cnf sst sockopt) |
| |
| if [ -z "$tcert" ]; then |
| wsrep_log_error "Encryption certificate not found in my.cnf" |
| exit 3 |
| else |
| SOCAT_OPTS="cert=$tcert" |
| fi |
| [ -n "$tkey" ] && SOCAT_OPTS="$SOCAT_OPTS,key=$tkey" |
| [ -n "$tca" ] && SOCAT_OPTS="$SOCAT_OPTS,cafile=$tca" |
| wsrep_log_info "Encryption setting to be used for socat tunnel: $SOCAT_OPTS" |
| |
| [ -n "$sockopt" ] && SOCAT_OPTS="$SOCAT_OPTS,$sockopt" |
| } |
| |
| MAGIC_FILE="$WSREP_SST_OPT_DATA/rsync_tunnel_sst_complete" |
| rm -rf "$MAGIC_FILE" |
| |
| BINLOG_TAR_FILE="$WSREP_SST_OPT_DATA/wsrep_sst_binlog.tar" |
| BINLOG_N_FILES=1 |
| rm -f "$BINLOG_TAR_FILE" || : |
| |
| if ! [ -z $WSREP_SST_OPT_BINLOG ] |
| then |
| BINLOG_DIRNAME=$(dirname $WSREP_SST_OPT_BINLOG) |
| BINLOG_FILENAME=$(basename $WSREP_SST_OPT_BINLOG) |
| fi |
| |
| WSREP_LOG_DIR=${WSREP_LOG_DIR:-""} |
| # if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf |
| if [ -z "$WSREP_LOG_DIR" ]; then |
| WSREP_LOG_DIR=$($MY_PRINT_DEFAULTS --mysqld \ |
| | grep -- '--innodb[-_]log[-_]group[-_]home[-_]dir=' \ |
| | cut -b 29- ) |
| fi |
| |
| if [ -n "$WSREP_LOG_DIR" ]; then |
| # handle both relative and absolute paths |
| WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P) |
| else |
| # default to datadir |
| WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P) |
| fi |
| |
| # Old filter - include everything except selected |
| # FILTER=(--exclude '*.err' --exclude '*.pid' --exclude '*.sock' \ |
| # --exclude '*.conf' --exclude core --exclude 'galera.*' \ |
| # --exclude grastate.txt --exclude '*.pem' \ |
| # --exclude '*.[0-9][0-9][0-9][0-9][0-9][0-9]' --exclude '*.index') |
| |
| # New filter - exclude everything except dirs (schemas) and innodb files |
| FILTER=(-f '- /lost+found' -f '- /.fseventsd' -f '- /.Trashes' |
| -f '+ /wsrep_sst_binlog.tar' -f '+ /ib_lru_dump' -f '+ /ibdata*' -f '+ /*/' -f '- /*') |
| |
| SOCAT_PID="$WSREP_SST_OPT_DATA/$MODULE-socat.pid" |
| |
| if check_pid $SOCAT_PID |
| then |
| wsrep_log_error "socat tunnel already running." |
| exit 114 # EALREADY |
| fi |
| rm -rf "$SOCAT_PID" |
| |
| setup_tunnel_args |
| |
| if [ "$WSREP_SST_OPT_ROLE" = "donor" ] |
| then |
| |
| SOCAT_JOINER_ADDR=$(echo $WSREP_SST_OPT_ADDR | awk -F'/' '{print $1}') |
| # map to name in case we received an IP |
| SOCAT_JOINER_HOST=$(getent hosts $SOCAT_JOINER_ADDR | awk '{ print $2 }') |
| if [ -z "$SOCAT_JOINER_HOST" ]; then |
| SOCAT_JOINER_HOST=$SOCAT_JOINER_ADDR |
| fi |
| SOCAT_PORT=$(echo $SOCAT_JOINER_ADDR | awk -F ':' '{ print $2 }') |
| if [ -z "$SOCAT_PORT" ] |
| then |
| SOCAT_PORT=4444 |
| fi |
| TARGET_ADDR=localhost:$SOCAT_PORT/$MODULE |
| |
| trap cleanup_tunnel EXIT |
| |
| # Socat forwards rsync connections to the joiner |
| SOCAT_SRC=tcp-listen:$SOCAT_PORT,bind=localhost,reuseaddr,fork |
| SOCAT_DST=openssl:$SOCAT_JOINER_HOST,$SOCAT_OPTS |
| wsrep_log_info "Setting up tunnel for donor: socat $SOCAT_SRC $SOCAT_DST" |
| socat $SOCAT_SRC $SOCAT_DST & |
| SOCAT_REAL_PID=$! |
| # This is ok because a local galera node doesn't run SST concurrently |
| echo $SOCAT_REAL_PID >"$SOCAT_PID" |
| until check_pid_and_port $SOCAT_PID $SOCAT_REAL_PID $SOCAT_PORT localhost "socat" |
| do |
| sleep 0.2 |
| done |
| |
| if [ $WSREP_SST_OPT_BYPASS -eq 0 ] |
| then |
| |
| FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed" |
| ERROR="$WSREP_SST_OPT_DATA/sst_error" |
| |
| rm -rf "$FLUSHED" |
| rm -rf "$ERROR" |
| |
| # Use deltaxfer only for WAN |
| inv=$(basename $0) |
| [ "$inv" = "wsrep_sst_rsync_wan" ] && WHOLE_FILE_OPT="" \ |
| || WHOLE_FILE_OPT="--whole-file" |
| |
| echo "flush tables" |
| |
| # Wait for : |
| # (a) Tables to be flushed, AND |
| # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR |
| # (c) ERROR file, in case flush tables operation failed. |
| |
| while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1 |
| do |
| # Check whether ERROR file exists. |
| if [ -f "$ERROR" ] |
| then |
| # Flush tables operation failed. |
| rm -rf "$ERROR" |
| exit 255 |
| fi |
| |
| sleep 0.2 |
| done |
| |
| STATE="$(cat $FLUSHED)" |
| rm -rf "$FLUSHED" |
| |
| sync |
| |
| if ! [ -z $WSREP_SST_OPT_BINLOG ] |
| then |
| # Prepare binlog files |
| pushd $BINLOG_DIRNAME &> /dev/null |
| binlog_files_full=$(tail -n $BINLOG_N_FILES ${BINLOG_FILENAME}.index) |
| binlog_files="" |
| for ii in $binlog_files_full |
| do |
| binlog_files="$binlog_files $(basename $ii)" |
| done |
| if ! [ -z "$binlog_files" ] |
| then |
| wsrep_log_info "Preparing binlog files for transfer:" |
| tar -cvf $BINLOG_TAR_FILE $binlog_files >&2 |
| fi |
| popd &> /dev/null |
| fi |
| |
| # first, the normal directories, so that we can detect incompatible protocol |
| RC=0 |
| rsync --owner --group --perms --links --specials \ |
| --ignore-times --inplace --dirs --delete --quiet \ |
| $WHOLE_FILE_OPT "${FILTER[@]}" "$WSREP_SST_OPT_DATA/" \ |
| rsync://$TARGET_ADDR >&2 || RC=$? |
| |
| if [ "$RC" -ne 0 ]; then |
| wsrep_log_error "rsync returned code $RC:" |
| |
| case $RC in |
| 12) RC=71 # EPROTO |
| wsrep_log_error \ |
| "rsync server on the other end has incompatible protocol. " \ |
| "Make sure you have the same version of rsync on all nodes." |
| ;; |
| 22) RC=12 # ENOMEM |
| ;; |
| *) RC=255 # unknown error |
| ;; |
| esac |
| exit $RC |
| fi |
| |
| # second, we transfer InnoDB log files |
| rsync --owner --group --perms --links --specials \ |
| --ignore-times --inplace --dirs --delete --quiet \ |
| $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '- **' "$WSREP_LOG_DIR/" \ |
| rsync://$TARGET_ADDR-log_dir >&2 || RC=$? |
| |
| if [ $RC -ne 0 ]; then |
| wsrep_log_error "rsync innodb_log_group_home_dir returned code $RC:" |
| exit 255 # unknown error |
| fi |
| |
| # then, we parallelize the transfer of database directories, use . so that pathconcatenation works |
| pushd "$WSREP_SST_OPT_DATA" >/dev/null |
| |
| count=1 |
| [ "$OS" == "Linux" ] && count=$(grep -c processor /proc/cpuinfo) |
| [ "$OS" == "Darwin" -o "$OS" == "FreeBSD" ] && count=$(sysctl -n hw.ncpu) |
| |
| find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" -print0 | \ |
| xargs -I{} -0 -P $count \ |
| rsync --owner --group --perms --links --specials \ |
| --ignore-times --inplace --recursive --delete --quiet \ |
| $WHOLE_FILE_OPT --exclude '*/ib_logfile*' "$WSREP_SST_OPT_DATA"/{}/ \ |
| rsync://$TARGET_ADDR/{} >&2 || RC=$? |
| |
| popd >/dev/null |
| |
| if [ $RC -ne 0 ]; then |
| wsrep_log_error "find/rsync returned code $RC:" |
| exit 255 # unknown error |
| fi |
| |
| else # BYPASS |
| wsrep_log_info "Bypassing state dump." |
| |
| # Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id |
| # (separated by a space). |
| STATE="$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" |
| fi |
| |
| echo "continue" # now server can resume updating data |
| |
| echo "$STATE" > "$MAGIC_FILE" |
| rsync --archive --quiet --checksum "$MAGIC_FILE" rsync://$TARGET_ADDR |
| |
| # to avoid cleanup race, stop tunnel before declaring the SST finished. |
| # This ensures galera won't start a new SST locally before we exit. |
| cleanup_tunnel |
| |
| echo "done $STATE" |
| |
| elif [ "$WSREP_SST_OPT_ROLE" = "joiner" ] |
| then |
| wsrep_check_programs lsof socat |
| |
| touch $SST_PROGRESS_FILE |
| MYSQLD_PID=$WSREP_SST_OPT_PARENT |
| |
| RSYNC_PID="$WSREP_SST_OPT_DATA/$MODULE.pid" |
| |
| if check_pid $RSYNC_PID |
| then |
| wsrep_log_error "rsync daemon already running." |
| exit 114 # EALREADY |
| fi |
| rm -rf "$RSYNC_PID" |
| |
| ADDR=$WSREP_SST_OPT_ADDR |
| RSYNC_PORT=$(echo $ADDR | awk -F ':' '{ print $2 }') |
| if [ -z "$RSYNC_PORT" ] |
| then |
| RSYNC_PORT=4444 |
| ADDR="$(echo $ADDR | awk -F ':' '{ print $1 }'):$RSYNC_PORT" |
| fi |
| |
| SOCAT_ADDR=$(echo $ADDR | awk -F ':' '{ print $1 }') |
| # map to name in case we received an IP |
| SOCAT_HOST=$(getent hosts $SOCAT_ADDR | awk '{ print $2 }') |
| if [ -z "$SOCAT_HOST" ]; then |
| SOCAT_HOST=$SOCAT_ADDR |
| fi |
| SOCAT_PORT=$RSYNC_PORT |
| |
| trap "exit 32" HUP PIPE |
| trap "exit 3" INT TERM ABRT |
| trap cleanup_joiner EXIT |
| |
| RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf" |
| |
| if [ -n "${MYSQL_TMP_DIR:-}" ] ; then |
| SILENT="log file = $MYSQL_TMP_DIR/rsynd.log" |
| else |
| SILENT="" |
| fi |
| |
| cat << EOF > "$RSYNC_CONF" |
| pid file = $RSYNC_PID |
| use chroot = no |
| read only = no |
| timeout = 300 |
| $SILENT |
| [$MODULE] |
| path = $WSREP_SST_OPT_DATA |
| [$MODULE-log_dir] |
| path = $WSREP_LOG_DIR |
| EOF |
| |
| # rm -rf "$DATA"/ib_logfile* # we don't want old logs around |
| |
| # Socat receives rsync connections from the donor |
| SOCAT_SRC=openssl-listen:$SOCAT_PORT,bind=$SOCAT_HOST,reuseaddr,fork,$SOCAT_OPTS |
| SOCAT_DST=tcp:localhost:$RSYNC_PORT |
| wsrep_log_info "Setting up tunnel for joiner: socat $SOCAT_SRC $SOCAT_DST" |
| socat $SOCAT_SRC $SOCAT_DST & |
| SOCAT_REAL_PID=$! |
| # This is ok because a local galera node doesn't run SST concurrently |
| echo $SOCAT_REAL_PID >"$SOCAT_PID" |
| until check_pid_and_port $SOCAT_PID $SOCAT_REAL_PID $SOCAT_PORT $SOCAT_HOST "socat" |
| do |
| sleep 0.2 |
| done |
| |
| wsrep_log_info "rsync --daemon --no-detach --address localhost --port $RSYNC_PORT --config \"$RSYNC_CONF\"" |
| rsync --daemon --no-detach --address localhost --port $RSYNC_PORT --config "$RSYNC_CONF" & |
| RSYNC_REAL_PID=$! |
| |
| until check_pid_and_port $RSYNC_PID $RSYNC_REAL_PID $RSYNC_PORT localhost "rsync" |
| do |
| sleep 0.2 |
| done |
| |
| echo "ready $ADDR/$MODULE" |
| |
| # wait for SST to complete by monitoring magic file |
| while [ ! -r "$MAGIC_FILE" ] && check_pid "$RSYNC_PID" && \ |
| check_pid "$SOCAT_PID" && ps -p $MYSQLD_PID >/dev/null |
| do |
| sleep 1 |
| done |
| |
| # to avoid cleanup race, we can tear down the socat tunnel now |
| # before signaling the end of the SST to galera. |
| cleanup_tunnel |
| |
| if ! ps -p $MYSQLD_PID >/dev/null |
| then |
| wsrep_log_error \ |
| "Parent mysqld process (PID:$MYSQLD_PID) terminated unexpectedly." |
| exit 32 |
| fi |
| |
| if ! [ -z $WSREP_SST_OPT_BINLOG ] |
| then |
| |
| pushd $BINLOG_DIRNAME &> /dev/null |
| if [ -f $BINLOG_TAR_FILE ] |
| then |
| # Clean up old binlog files first |
| rm -f ${BINLOG_FILENAME}.* |
| wsrep_log_info "Extracting binlog files:" |
| tar -xvf $BINLOG_TAR_FILE >&2 |
| for ii in $(ls -1 ${BINLOG_FILENAME}.*) |
| do |
| echo ${BINLOG_DIRNAME}/${ii} >> ${BINLOG_FILENAME}.index |
| done |
| fi |
| popd &> /dev/null |
| fi |
| if [ -r "$MAGIC_FILE" ] |
| then |
| # UUID:seqno & wsrep_gtid_domain_id is received here. |
| cat "$MAGIC_FILE" # Output : UUID:seqno wsrep_gtid_domain_id |
| else |
| # this message should cause joiner to abort |
| echo "rsync process ended without creating '$MAGIC_FILE'" |
| fi |
| wsrep_cleanup_progress_file |
| # cleanup_joiner |
| else |
| wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'" |
| exit 22 # EINVAL |
| fi |
| |
| rm -f $BINLOG_TAR_FILE || : |
| |
| exit 0 |