ganapathi / rpms / mariadb

Forked from rpms/mariadb 4 years ago
Clone

Blame SOURCES/wsrep_sst_rsync_tunnel

a555f2
#!/bin/bash -ue
a555f2
a555f2
# Copyright (C) 2010-2014 Codership Oy
a555f2
# Copyright (C) 2017 Damien Ciabrini <damien.ciabrini@gmail.com>
a555f2
#
a555f2
# This program is free software; you can redistribute it and/or modify
a555f2
# it under the terms of the GNU General Public License as published by
a555f2
# the Free Software Foundation; version 2 of the License.
a555f2
#
a555f2
# This program is distributed in the hope that it will be useful,
a555f2
# but WITHOUT ANY WARRANTY; without even the implied warranty of
a555f2
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
a555f2
# GNU General Public License for more details.
a555f2
#
a555f2
# You should have received a copy of the GNU General Public License
a555f2
# along with this program; see the file COPYING. If not, write to the
a555f2
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
a555f2
# MA  02110-1301  USA.
a555f2
a555f2
# This is a reference script for rsync-based state snapshot tansfer
a555f2
# over an encrypted communication channel, managed by socat
a555f2
a555f2
RSYNC_PID=                                      # rsync pid file
a555f2
RSYNC_CONF=                                     # rsync configuration file
a555f2
RSYNC_REAL_PID=                                 # rsync process id
a555f2
a555f2
SOCAT_PID=                                      # socat pid file
a555f2
SOCAT_REAL_PID=                                 # socat process id
a555f2
a555f2
SOCAT_OPTS=                                     # openssl connection args
a555f2
a555f2
MODULE="rsync_tunnel_sst"
a555f2
a555f2
OS=$(uname)
a555f2
[ "$OS" == "Darwin" ] && export -n LD_LIBRARY_PATH
a555f2
a555f2
# Setting the path for lsof on CentOS
a555f2
export PATH="/usr/sbin:/sbin:$PATH"
a555f2
a555f2
. $(dirname $0)/wsrep_sst_common
a555f2
a555f2
wsrep_check_programs rsync socat
a555f2
a555f2
cleanup_pid()
a555f2
{
a555f2
    local real_pid=$1
a555f2
    [ "0" != "$real_pid" ]            && \
a555f2
    kill $real_pid                    && \
a555f2
    sleep 0.5                         && \
a555f2
    kill -9 $real_pid >/dev/null 2>&1 || \
a555f2
    :
a555f2
}
a555f2
a555f2
cleanup_tunnel()
a555f2
{
a555f2
    wsrep_log_info "cleanup socat PID: $SOCAT_REAL_PID"
a555f2
    [ -n "$SOCAT_REAL_PID" ] && cleanup_pid $SOCAT_REAL_PID
a555f2
    rm -rf "$SOCAT_PID"
a555f2
}
a555f2
a555f2
cleanup_joiner()
a555f2
{
a555f2
    wsrep_log_info "Joiner cleanup. rsync PID: $RSYNC_REAL_PID"
a555f2
    [ -n "$RSYNC_REAL_PID" ] && cleanup_pid $RSYNC_REAL_PID
a555f2
    rm -rf "$RSYNC_CONF"
a555f2
    rm -rf "$MAGIC_FILE"
a555f2
    rm -rf "$RSYNC_PID"
a555f2
a555f2
    cleanup_tunnel
a555f2
a555f2
    wsrep_log_info "Joiner cleanup done."
a555f2
    if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then
a555f2
        wsrep_cleanup_progress_file
a555f2
    fi
a555f2
}
a555f2
a555f2
# Check whether process is still running.
a555f2
check_pid()
a555f2
{
a555f2
    local pid_file=$1
a555f2
    [ -r "$pid_file" ] && ps -p $(cat $pid_file) >/dev/null 2>&1
a555f2
}
a555f2
a555f2
check_pid_and_port()
a555f2
{
a555f2
    local pid_file=$1
a555f2
    local service_pid=$2
a555f2
    local service_port=$3
a555f2
    local service_host=$4
a555f2
    local service_name=$5
a555f2
a555f2
    if ! which lsof > /dev/null; then
a555f2
      wsrep_log_error "lsof tool not found in PATH! Make sure you have it installed."
a555f2
      exit 2 # ENOENT
a555f2
    fi
a555f2
a555f2
    local port_info=$(lsof -i "@"$service_host:$service_port -Pn 2>/dev/null | \
a555f2
        grep "(LISTEN)")
a555f2
    local is_service=$(echo $port_info | \
a555f2
        grep -w '^'"$service_name"'[[:space:]]\+'"$service_pid" 2>/dev/null)
a555f2
a555f2
    if [ -n "$port_info" -a -z "$is_service" ]; then
a555f2
        wsrep_log_error "$service_name daemon port '$service_port' has been taken"
a555f2
        exit 16 # EBUSY
a555f2
    fi
a555f2
a555f2
    if ! check_pid $pid_file; then
a555f2
        wsrep_log_error "$service_name process terminated unexpectedly"
a555f2
        exit 10 # ECHILD
a555f2
    fi
a555f2
a555f2
    [ -n "$port_info" ] && [ -n "$is_service" ] && \
a555f2
        [ $(cat $pid_file) -eq $service_pid ]
a555f2
}
a555f2
a555f2
config_from_cnf()
a555f2
{
a555f2
    local group=$1
a555f2
    local key=$2
a555f2
    echo $($MY_PRINT_DEFAULTS $group | grep -- "--$key=" | cut -d= -f2- | tail -1)
a555f2
}
a555f2
a555f2
setup_tunnel_args()
a555f2
{
a555f2
    tca=$(config_from_cnf sst tca)
a555f2
    tkey=$(config_from_cnf sst tkey)
a555f2
    tcert=$(config_from_cnf sst tcert)
a555f2
    sockopt=$(config_from_cnf sst sockopt)
a555f2
a555f2
    if [ -z "$tcert" ]; then
a555f2
        wsrep_log_error "Encryption certificate not found in my.cnf"
a555f2
        exit 3
a555f2
    else
a555f2
        SOCAT_OPTS="cert=$tcert"
a555f2
    fi
a555f2
    [ -n "$tkey" ] && SOCAT_OPTS="$SOCAT_OPTS,key=$tkey"
a555f2
    [ -n "$tca" ] && SOCAT_OPTS="$SOCAT_OPTS,cafile=$tca"
a555f2
    wsrep_log_info "Encryption setting to be used for socat tunnel: $SOCAT_OPTS"
a555f2
a555f2
    [ -n "$sockopt" ] && SOCAT_OPTS="$SOCAT_OPTS,$sockopt"
a555f2
}
a555f2
a555f2
MAGIC_FILE="$WSREP_SST_OPT_DATA/rsync_tunnel_sst_complete"
a555f2
rm -rf "$MAGIC_FILE"
a555f2
a555f2
BINLOG_TAR_FILE="$WSREP_SST_OPT_DATA/wsrep_sst_binlog.tar"
a555f2
BINLOG_N_FILES=1
a555f2
rm -f "$BINLOG_TAR_FILE" || :
a555f2
a555f2
if ! [ -z $WSREP_SST_OPT_BINLOG ]
a555f2
then
a555f2
    BINLOG_DIRNAME=$(dirname $WSREP_SST_OPT_BINLOG)
a555f2
    BINLOG_FILENAME=$(basename $WSREP_SST_OPT_BINLOG)
a555f2
fi
a555f2
a555f2
WSREP_LOG_DIR=${WSREP_LOG_DIR:-""}
a555f2
# if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf
a555f2
if [ -z "$WSREP_LOG_DIR" ]; then
a555f2
    WSREP_LOG_DIR=$($MY_PRINT_DEFAULTS --mysqld \
a555f2
                    | grep -- '--innodb[-_]log[-_]group[-_]home[-_]dir=' \
a555f2
                    | cut -b 29- )
a555f2
fi
a555f2
a555f2
if [ -n "$WSREP_LOG_DIR" ]; then
a555f2
    # handle both relative and absolute paths
a555f2
    WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P)
a555f2
else
a555f2
    # default to datadir
a555f2
    WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P)
a555f2
fi
a555f2
a555f2
# Old filter - include everything except selected
a555f2
# FILTER=(--exclude '*.err' --exclude '*.pid' --exclude '*.sock' \
a555f2
#         --exclude '*.conf' --exclude core --exclude 'galera.*' \
a555f2
#         --exclude grastate.txt --exclude '*.pem' \
a555f2
#         --exclude '*.[0-9][0-9][0-9][0-9][0-9][0-9]' --exclude '*.index')
a555f2
a555f2
# New filter - exclude everything except dirs (schemas) and innodb files
a555f2
FILTER=(-f '- /lost+found' -f '- /.fseventsd' -f '- /.Trashes'
a555f2
        -f '+ /wsrep_sst_binlog.tar' -f '+ /ib_lru_dump' -f '+ /ibdata*' -f '+ /*/' -f '- /*')
a555f2
a555f2
SOCAT_PID="$WSREP_SST_OPT_DATA/$MODULE-socat.pid"
a555f2
a555f2
if check_pid $SOCAT_PID
a555f2
then
a555f2
    wsrep_log_error "socat tunnel already running."
a555f2
    exit 114 # EALREADY
a555f2
fi
a555f2
rm -rf "$SOCAT_PID"
a555f2
a555f2
setup_tunnel_args
a555f2
a555f2
if [ "$WSREP_SST_OPT_ROLE" = "donor" ]
a555f2
then
a555f2
a555f2
    SOCAT_JOINER_ADDR=$(echo $WSREP_SST_OPT_ADDR | awk -F'/' '{print $1}')
a555f2
    # map to name in case we received an IP
a555f2
    SOCAT_JOINER_HOST=$(getent hosts $SOCAT_JOINER_ADDR | awk '{ print $2 }')
a555f2
    if [ -z "$SOCAT_JOINER_HOST" ]; then
a555f2
        SOCAT_JOINER_HOST=$SOCAT_JOINER_ADDR
a555f2
    fi
a555f2
    SOCAT_PORT=$(echo $SOCAT_JOINER_ADDR | awk -F ':' '{ print $2 }')
a555f2
    if [ -z "$SOCAT_PORT" ]
a555f2
    then
a555f2
        SOCAT_PORT=4444
a555f2
    fi
a555f2
    TARGET_ADDR=localhost:$SOCAT_PORT/$MODULE
a555f2
a555f2
    trap cleanup_tunnel EXIT
a555f2
a555f2
    # Socat forwards rsync connections to the joiner
a555f2
    SOCAT_SRC=tcp-listen:$SOCAT_PORT,bind=localhost,reuseaddr,fork
a555f2
    SOCAT_DST=openssl:$SOCAT_JOINER_HOST,$SOCAT_OPTS
a555f2
    wsrep_log_info "Setting up tunnel for donor: socat $SOCAT_SRC $SOCAT_DST"
a555f2
    socat $SOCAT_SRC $SOCAT_DST &
a555f2
    SOCAT_REAL_PID=$!
a555f2
    # This is ok because a local galera node doesn't run SST concurrently
a555f2
    echo $SOCAT_REAL_PID >"$SOCAT_PID"
a555f2
    until check_pid_and_port $SOCAT_PID $SOCAT_REAL_PID $SOCAT_PORT localhost "socat"
a555f2
    do
a555f2
        sleep 0.2
a555f2
    done
a555f2
a555f2
    if [ $WSREP_SST_OPT_BYPASS -eq 0 ]
a555f2
    then
a555f2
a555f2
        FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed"
a555f2
        ERROR="$WSREP_SST_OPT_DATA/sst_error"
a555f2
a555f2
        rm -rf "$FLUSHED"
a555f2
        rm -rf "$ERROR"
a555f2
a555f2
        # Use deltaxfer only for WAN
a555f2
        inv=$(basename $0)
a555f2
        [ "$inv" = "wsrep_sst_rsync_wan" ] && WHOLE_FILE_OPT="" \
a555f2
                                           || WHOLE_FILE_OPT="--whole-file"
a555f2
a555f2
        echo "flush tables"
a555f2
a555f2
        # Wait for :
a555f2
        # (a) Tables to be flushed, AND
a555f2
        # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR
a555f2
        # (c) ERROR file, in case flush tables operation failed.
a555f2
a555f2
        while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1
a555f2
        do
a555f2
            # Check whether ERROR file exists.
a555f2
            if [ -f "$ERROR" ]
a555f2
            then
a555f2
                # Flush tables operation failed.
a555f2
                rm -rf "$ERROR"
a555f2
                exit 255
a555f2
            fi
a555f2
a555f2
            sleep 0.2
a555f2
        done
a555f2
a555f2
        STATE="$(cat $FLUSHED)"
a555f2
        rm -rf "$FLUSHED"
a555f2
a555f2
        sync
a555f2
a555f2
        if ! [ -z $WSREP_SST_OPT_BINLOG ]
a555f2
        then
a555f2
            # Prepare binlog files
a555f2
            pushd $BINLOG_DIRNAME &> /dev/null
a555f2
            binlog_files_full=$(tail -n $BINLOG_N_FILES ${BINLOG_FILENAME}.index)
a555f2
            binlog_files=""
a555f2
            for ii in $binlog_files_full
a555f2
            do
a555f2
                binlog_files="$binlog_files $(basename $ii)"
a555f2
            done
a555f2
            if ! [ -z "$binlog_files" ]
a555f2
            then
a555f2
                wsrep_log_info "Preparing binlog files for transfer:"
a555f2
                tar -cvf $BINLOG_TAR_FILE $binlog_files >&2
a555f2
            fi
a555f2
            popd &> /dev/null
a555f2
        fi
a555f2
a555f2
        # first, the normal directories, so that we can detect incompatible protocol
a555f2
        RC=0
a555f2
        rsync --owner --group --perms --links --specials \
a555f2
              --ignore-times --inplace --dirs --delete --quiet \
a555f2
              $WHOLE_FILE_OPT "${FILTER[@]}" "$WSREP_SST_OPT_DATA/" \
a555f2
              rsync://$TARGET_ADDR >&2 || RC=$?
a555f2
a555f2
        if [ "$RC" -ne 0 ]; then
a555f2
            wsrep_log_error "rsync returned code $RC:"
a555f2
a555f2
            case $RC in
a555f2
            12) RC=71  # EPROTO
a555f2
                wsrep_log_error \
a555f2
                "rsync server on the other end has incompatible protocol. " \
a555f2
                "Make sure you have the same version of rsync on all nodes."
a555f2
                ;;
a555f2
            22) RC=12  # ENOMEM
a555f2
                ;;
a555f2
            *)  RC=255 # unknown error
a555f2
                ;;
a555f2
            esac
a555f2
            exit $RC
a555f2
        fi
a555f2
a555f2
        # second, we transfer InnoDB log files
a555f2
        rsync --owner --group --perms --links --specials \
a555f2
              --ignore-times --inplace --dirs --delete --quiet \
a555f2
              $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '- **' "$WSREP_LOG_DIR/" \
a555f2
              rsync://$TARGET_ADDR-log_dir >&2 || RC=$?
a555f2
a555f2
        if [ $RC -ne 0 ]; then
a555f2
            wsrep_log_error "rsync innodb_log_group_home_dir returned code $RC:"
a555f2
            exit 255 # unknown error
a555f2
        fi
a555f2
a555f2
        # then, we parallelize the transfer of database directories, use . so that pathconcatenation works
a555f2
        pushd "$WSREP_SST_OPT_DATA" >/dev/null
a555f2
a555f2
        count=1
a555f2
        [ "$OS" == "Linux" ] && count=$(grep -c processor /proc/cpuinfo)
a555f2
        [ "$OS" == "Darwin" -o "$OS" == "FreeBSD" ] && count=$(sysctl -n hw.ncpu)
a555f2
a555f2
        find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" -print0 | \
a555f2
             xargs -I{} -0 -P $count \
a555f2
             rsync --owner --group --perms --links --specials \
a555f2
             --ignore-times --inplace --recursive --delete --quiet \
a555f2
             $WHOLE_FILE_OPT --exclude '*/ib_logfile*' "$WSREP_SST_OPT_DATA"/{}/ \
a555f2
             rsync://$TARGET_ADDR/{} >&2 || RC=$?
a555f2
a555f2
        popd >/dev/null
a555f2
a555f2
        if [ $RC -ne 0 ]; then
a555f2
            wsrep_log_error "find/rsync returned code $RC:"
a555f2
            exit 255 # unknown error
a555f2
        fi
a555f2
a555f2
    else # BYPASS
a555f2
        wsrep_log_info "Bypassing state dump."
a555f2
a555f2
        # Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id
a555f2
        # (separated by a space).
a555f2
        STATE="$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID"
a555f2
    fi
a555f2
a555f2
    echo "continue" # now server can resume updating data
a555f2
a555f2
    echo "$STATE" > "$MAGIC_FILE"
a555f2
    rsync --archive --quiet --checksum "$MAGIC_FILE" rsync://$TARGET_ADDR
a555f2
a555f2
    echo "done $STATE"
a555f2
a555f2
elif [ "$WSREP_SST_OPT_ROLE" = "joiner" ]
a555f2
then
a555f2
    wsrep_check_programs lsof socat
a555f2
a555f2
    touch $SST_PROGRESS_FILE
a555f2
    MYSQLD_PID=$WSREP_SST_OPT_PARENT
a555f2
a555f2
    RSYNC_PID="$WSREP_SST_OPT_DATA/$MODULE.pid"
a555f2
a555f2
    if check_pid $RSYNC_PID
a555f2
    then
a555f2
        wsrep_log_error "rsync daemon already running."
a555f2
        exit 114 # EALREADY
a555f2
    fi
a555f2
    rm -rf "$RSYNC_PID"
a555f2
a555f2
    ADDR=$WSREP_SST_OPT_ADDR
a555f2
    RSYNC_PORT=$(echo $ADDR | awk -F ':' '{ print $2 }')
a555f2
    if [ -z "$RSYNC_PORT" ]
a555f2
    then
a555f2
        RSYNC_PORT=4444
a555f2
        ADDR="$(echo $ADDR | awk -F ':' '{ print $1 }'):$RSYNC_PORT"
a555f2
    fi
a555f2
a555f2
    SOCAT_ADDR=$(echo $ADDR | awk -F ':' '{ print $1 }')
a555f2
    # map to name in case we received an IP
a555f2
    SOCAT_HOST=$(getent hosts $SOCAT_ADDR | awk '{ print $2 }')
a555f2
    if [ -z "$SOCAT_HOST" ]; then
a555f2
        SOCAT_HOST=$SOCAT_ADDR
a555f2
    fi
a555f2
    SOCAT_PORT=$RSYNC_PORT
a555f2
a555f2
    trap "exit 32" HUP PIPE
a555f2
    trap "exit 3"  INT TERM ABRT
a555f2
    trap cleanup_joiner EXIT
a555f2
a555f2
    RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf"
a555f2
a555f2
    if [ -n "${MYSQL_TMP_DIR:-}" ] ; then
a555f2
      SILENT="log file = $MYSQL_TMP_DIR/rsynd.log"
a555f2
    else
a555f2
      SILENT=""
a555f2
    fi
a555f2
a555f2
cat << EOF > "$RSYNC_CONF"
a555f2
pid file = $RSYNC_PID
a555f2
use chroot = no
a555f2
read only = no
a555f2
timeout = 300
a555f2
$SILENT
a555f2
[$MODULE]
a555f2
    path = $WSREP_SST_OPT_DATA
a555f2
[$MODULE-log_dir]
a555f2
    path = $WSREP_LOG_DIR
a555f2
EOF
a555f2
a555f2
#    rm -rf "$DATA"/ib_logfile* # we don't want old logs around
a555f2
a555f2
    # Socat receives rsync connections from the donor
a555f2
    SOCAT_SRC=openssl-listen:$SOCAT_PORT,bind=$SOCAT_HOST,reuseaddr,fork,$SOCAT_OPTS
a555f2
    SOCAT_DST=tcp:localhost:$RSYNC_PORT
a555f2
    wsrep_log_info "Setting up tunnel for joiner: socat $SOCAT_SRC $SOCAT_DST"
a555f2
    socat $SOCAT_SRC $SOCAT_DST &
a555f2
    SOCAT_REAL_PID=$!
a555f2
    # This is ok because a local galera node doesn't run SST concurrently
a555f2
    echo $SOCAT_REAL_PID >"$SOCAT_PID"
a555f2
    until check_pid_and_port $SOCAT_PID $SOCAT_REAL_PID $SOCAT_PORT $SOCAT_HOST "socat"
a555f2
    do
a555f2
        sleep 0.2
a555f2
    done
a555f2
a555f2
    wsrep_log_info "rsync --daemon --no-detach --address localhost --port $RSYNC_PORT --config \"$RSYNC_CONF\""
a555f2
    rsync --daemon --no-detach --address localhost --port $RSYNC_PORT --config "$RSYNC_CONF" &
a555f2
    RSYNC_REAL_PID=$!
a555f2
a555f2
    until check_pid_and_port $RSYNC_PID $RSYNC_REAL_PID $RSYNC_PORT localhost "rsync"
a555f2
    do
a555f2
        sleep 0.2
a555f2
    done
a555f2
a555f2
    echo "ready $ADDR/$MODULE"
a555f2
a555f2
    # wait for SST to complete by monitoring magic file
a555f2
    while [ ! -r "$MAGIC_FILE" ] && check_pid "$RSYNC_PID" && \
a555f2
          check_pid "$SOCAT_PID" && ps -p $MYSQLD_PID >/dev/null
a555f2
    do
a555f2
        sleep 1
a555f2
    done
a555f2
a555f2
    if ! ps -p $MYSQLD_PID >/dev/null
a555f2
    then
a555f2
        wsrep_log_error \
a555f2
        "Parent mysqld process (PID:$MYSQLD_PID) terminated unexpectedly."
a555f2
        exit 32
a555f2
    fi
a555f2
a555f2
    if ! [ -z $WSREP_SST_OPT_BINLOG ]
a555f2
    then
a555f2
a555f2
        pushd $BINLOG_DIRNAME &> /dev/null
a555f2
        if [ -f $BINLOG_TAR_FILE ]
a555f2
        then
a555f2
            # Clean up old binlog files first
a555f2
            rm -f ${BINLOG_FILENAME}.*
a555f2
            wsrep_log_info "Extracting binlog files:"
a555f2
            tar -xvf $BINLOG_TAR_FILE >&2
a555f2
            for ii in $(ls -1 ${BINLOG_FILENAME}.*)
a555f2
            do
a555f2
                echo ${BINLOG_DIRNAME}/${ii} >> ${BINLOG_FILENAME}.index
a555f2
            done
a555f2
        fi
a555f2
        popd &> /dev/null
a555f2
    fi
a555f2
    if [ -r "$MAGIC_FILE" ]
a555f2
    then
a555f2
        # UUID:seqno & wsrep_gtid_domain_id is received here.
a555f2
        cat "$MAGIC_FILE" # Output : UUID:seqno wsrep_gtid_domain_id
a555f2
    else
a555f2
        # this message should cause joiner to abort
a555f2
        echo "rsync process ended without creating '$MAGIC_FILE'"
a555f2
    fi
a555f2
    wsrep_cleanup_progress_file
a555f2
#    cleanup_joiner
a555f2
else
a555f2
    wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
a555f2
    exit 22 # EINVAL
a555f2
fi
a555f2
a555f2
rm -f $BINLOG_TAR_FILE || :
a555f2
a555f2
exit 0