#!/bin/bash
# Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  Redistributions of source code must retain the above copyright
#  notice, this list of conditions and the following disclaimer.
#  Redistributions in binary form must reproduce the above copyright
#  notice, this list of conditions and the following disclaimer in
#  the documentation and/or other materials provided with the
#  distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#  Script to check that environment is set up correctly for usnic_verbs
# programs to be able to run successfully.

need_enic_verrel=2.1.1.46
cisco_ompi_name=openmpi-cisco
sys_infin_root=/sys/class/infiniband
enic_arch='.x86_64'
usnic_verbs_arch='.x86_64'
prog_req=( "rpm"         \
           "modinfo"     \
           "ibv_devinfo")

main()
{
    warns=0
    detect_distro
    check_utils
    print_versions        # show what components are installed
    check_config        # check configurations

    (( warns > 1 )) && plural=s
    if (( warns > 0 )); then
        echo "${warns} warning${plural}"
        exit 1
    else
        exit 0
    fi
}

check_utils()
{
    for prog in "${prog_req[@]}"; do
        command -v "${prog}" >/dev/null 2>&1 || \
            { echo -e >&2 "\nERROR: '${prog}' is required to run this script."; \
                 exit 1; }
    done
}

detect_distro()
{
    if [[ -e /etc/redhat-release ]]; then
        distro="rhel"
        enic_module_name="kmod-enic"
        enic_ver_separator='-'
    elif [[ -e /etc/SuSE-release ]]; then
        distro="sles"
        enic_module_name="cisco-enic-kmp-default"
        enic_ver_separator='_'
    else
        distro="Unknown"
        enic_module_name="Unknown"
    fi
}

warn()
{
    printf "WARNING: $*\n"
    let warns+=1
}

# This routine assumes an input string of this form: X.Y.Z.K
verrel_to_int()
{
    OFS=$IFS; IFS="."; set - $1; IFS=$OFS
    expr $1 \* 256 \* 256 \* 256 + \
          $2 \* 256 \* 256 + \
          $3 \* 256 + \
          $4
}


print_versions()
{
    print_enic_version
    print_usnic_verbs_version
    print_libusnic_verbs_version
    print_ompi_version
}

check_config()
{
    check_verbs
    check_ucs_config
    check_ompi_setup
}

print_enic_version()
{
    rpm=$(rpm -q ${enic_module_name} 2> /dev/null)
    [[ $? != 0 ]] && rpm=""

    enic=$(lsmod | grep ^enic)
    enic_vers=$(modinfo enic 2> /dev/null | grep ^version: | awk '{ print $2 }')

    if [[ -z "${rpm}" ]]; then
        warn ${enic_module_name}" RPM is not installed"
    else
        rpmverrel=${rpm#${enic_module_name}'-'}
        rpmverrel=${rpmverrel%${enic_arch}}
        rpmver=${rpmverrel%%${enic_ver_separator}*}

        need_ver_int=$(verrel_to_int ${need_enic_verrel})
        rpm_ver_int=$(verrel_to_int ${rpmver})

        # Check for up-to-date enic version
        if (( need_ver_int > rpm_ver_int )); then
            warn "enic RPM is ${rpmver}, need ${need_enic_verrel}"
        fi

        echo "enic RPM version ${rpmverrel} installed"

        if [[ "${enic_vers}" != "${rpmver}" ]]; then
            warn "modinfo enic version (${enic_vers}) does not match RPM version (${rpmver})"
        fi

        if [[ -z "${enic}" ]]; then
            warn "enic module not loaded"
        else
            loadver=$(cat /sys/module/enic/version)
            if [[ "${loadver}" != "${rpmver}" ]]; then
                warn "Loaded enic version (${loadver}) does not match RPM version (${rpmver})"
            fi
        fi
    fi
}

print_usnic_verbs_version()
{
    rpm=$(rpm -q kmod-usnic_verbs 2> /dev/null)
    [[ $? != 0 ]] && rpm=""

    usnic_verbs=$(lsmod | grep ^usnic_verbs)
    usnic_verbs_vers=$(modinfo usnic_verbs 2> /dev/null | grep ^version: | awk '{ print $2 }')

    if [[ -z "${rpm}" ]]; then
        warn "kmod-usnic_verbs RPM is not installed"
    else
        rpmverrel=${rpm#kmod-usnic_verbs-}
        rpmverrel=${rpmverrel%${usnic_verbs_arch}}
        rpmver=${rpmverrel%-*}

        echo "usnic_verbs RPM version ${rpmverrel} installed"

        if [[ "${usnic_verbs_vers}" != "${rpmver}" ]]; then
            warn "modinfo usnic_verbs version (${usnic_verbs}) does not match RPM version (${rpmver})"
        fi

        if [[ -z "${usnic_verbs}" ]]; then
            iommu=$(dmesg | grep "IOMMU required but not present")
            if [[ -n "${iommu}" ]]; then
                warn "kernel_paramter intel_iommu=on missing, usnic_verbs cannot function"
            else
                warn "usnic_verbs module not loaded"
            fi
        else
            loadver=$(cat /sys/module/usnic_verbs/version)
            if [[ "${loadver}" != "${rpmver}" ]]; then
                warn "Loaded usnic_verbs version (${loadver}) does not match RPM version (${rpmver})"
            fi
        fi
    fi
}

print_libusnic_verbs_version()
{
    rpm=$(rpm -q libusnic_verbs 2> /dev/null)
    [[ $? != 0 ]] && rpm=""

    if [[ -z "${rpm}" ]]; then
        warn "libusnic_verbs RPM is not installed"
    else
        rpmverrel=${rpm#libusnic_verbs-}
        rpmverrel=${rpmverrel%.x86_64}
        rpmver=${rpmverrel%-*}

        echo "libusnic_verbs RPM version ${rpmverrel} installed"
    fi
}

check_cisco_ompi_pkg()
{
    rpm=$(rpm -q ${cisco_ompi_name} 2> /dev/null)
    [[ $? != 0 ]] && rpm=""

    if [[ ! -z "$rpm" ]]; then
        mpirpmfullver=${rpm#${cisco_ompi_name}}
        mpirpmfullver=${mpirpmfullver%.x86_64}
        mpirpmverrel=${mpirpmfullver#-}
        mpirpmver=${mpirpmverrel%-*}

        env_sh=$(rpm -ql ${cisco_ompi_name} | grep "openmpi-vars.sh" 2> /dev/null)
    fi
}

print_ompi_version()
{
    check_cisco_ompi_pkg

    ompi_info_path=$(which ompi_info 2> /dev/null)
    if [[ $? != 0 ]]; then
        warn_msg="Cannot run ompi_info."
        if [[ ! -z "$rpm" ]]; then
            warn_msg="${warn_msg} RPM $rpm is installed, proper envirorment settings are required."
            if [[ ! -z "$env_sh" ]]; then
                warn_msg="${warn_msg} Please source $env_sh"
            fi
        else
            warn_msg="${warn_msg} Check if Open MPI is installed or environments are set properly."
        fi
        warn "${warn_msg}"
        ompi_path=""
        return
    fi

    echo "Using ${ompi_info_path} to check Open MPI info..."

    ompi_home=$(ompi_info --parsable 2> /dev/null | grep path:prefix: | cut -d: -f3-)
    ompi_path=${ompi_home}/bin
    ompi_ver=$(ompi_info --parsable 2> /dev/null | grep ompi:version:full | cut -d: -f4-)
    if [[ -z "$ompi_home" || -z "$ompi_ver" ]]; then
        warn "Cannot get Open MPI path or version from ompi_info"
    elif [[ ! -z "$rpm" && ! "${mpirpmver}" = "${ompi_ver}"* ]]; then
        warn "Open MPI lib version \"${ompi_ver}\" != rpm ${rpm} version \"${mpirpmver}\""
    fi

    echo "Open MPI version ${ompi_ver} installed"
}

check_ompi_setup()
{
    # if not ompi_path, we already complained
    [[ -z "${ompi_path}" ]] && return

    ompi_info --parsable | grep -q mca:btl:usnic:
    if [[ "$?" != 0 ]]; then
        warn "Open MPI lib installed in ${ompi_home} does not support usNIC."
        return
    fi

    # Make sure we get the right "mpirun"
    mpirun=$(which mpirun 2> /dev/null)
    if [[ -z "${mpirun}" ]]; then
        warn "PATH not set to find mpirun"
    elif [[ "${mpirun}" != ${ompi_path}/mpirun ]]; then
        warn "Wrong mpirun in PATH, should be ${mpi_path}/mpirun"
    fi

    mpicc=$(which mpicc 2> /dev/null)
    if [[ -z "${mpicc}" ]]; then
        warn "mpicc is not installed or PATH is not set properly to run mpicc"
        return
    fi

    # Make sure correct libmpi is picked up
    opwd=$PWD
    tmpdir=$(mktemp -d)
    cd ${tmpdir}
    cat << E > test.c
#include <stdlib.h>
#include "mpi.h"
main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    exit(0);
}
E
    mpicc -o test test.c
    if [[ $? != 0 ]]; then
        warn "Error compiling trivial MPI program"
    else
        wantlib=${ompi_home}/lib/libmpi.so.1
        lib=$(ldd test | grep libmpi | awk '{ print $3 }')
        if [[ "${lib}" != "${wantlib}" ]]; then
            warn "First libmpi in path is ${lib}"
            echo "    should be ${wantlib}"
        fi
    fi

    cd $opwd
    rm -r ${tmpdir}
}

check_ucs_config()
{
    #
    # Check that an adequate number of VFs have been configured
    #
    devs=$(ibv_devinfo 2> /dev/null | grep hca_id | awk '{ print $2 }')
    if [[ $? != 0 ]]; then
        warn "Error running ibv_devinfo"
        return
    elif [[ -z "${devs}" ]]; then
        warn "No usnic verbs devices found"
        return
    fi

    numcores=$(cat /proc/cpuinfo 2> /dev/null | egrep 'core id|physical id' | \
                tr -d '\n' | sed s/physical/\\nphysical/g | grep -v ^$ | sort | \
                uniq | wc -l)

    for dev in ${devs}; do
        local sys_max_vf=${sys_infin_root}/${dev}/max_vf
        if [[ ! -e ${sys_max_vf} ]]; then
                warn "cannot find ${sys_max_vf}"
                return
        fi

        num_vfs=$(cat "${sys_max_vf}" 2>/dev/null)
       	if [[ ${num_vfs} -lt ${numcores} ]]; then
            errmsg="${dev}:the number of usNICs provisioned (${num_vfs})"
            errmsg+=" is less than the number of CPU cores (${numcores}).\n"
            errmsg+="         Check the usNIC UCS configuration guide documents"
            errmsg+=" for more details about how to provision usNICs."
            warn "$errmsg"
        fi
    done
}

check_verbs()
{
    uverbs=$(lsmod | grep ^ib_uverbs)
    if [[ -z "${uverbs}" ]]; then
        warn "uverbs not loaded (service rdma start)"
        return
    fi

    devs=$(ibv_devinfo 2> /dev/null | grep hca_id | awk '{ print $2 }')
    if [[ $? != 0 ]]; then
        warn "Error running ibv_devinfo"
        return
    elif [[ -z "${devs}" ]]; then
        warn "No usnic verbs devices found"
        return
    fi

    for dev in ${devs}; do
        check_usnic_dev $dev
    done
}

check_usnic_dev()
{
    dev=$1
    state=$(ibv_devinfo -d $dev 2> /dev/null | grep "	state:" | awk '{ print $2 }')
    if [[ "${state}" == PORT_DOWN ]]; then
        warn "port for $dev is down, check ifconfig"
    elif [[ "${state}" != PORT_ACTIVE ]]; then
        warn "Bad state for $dev: \"${state}\""
    fi

    # Deduce the ethernet interface from GUID - better way?
    guid=$(ibv_devinfo -d ${dev} 2> /dev/null | grep sys_image_guid: | awk '{ print $2}')
    mac=$(echo $guid | cut -c 3-4,6-7,13-14,16-19)
    iface=$(ifconfig -a | sed s/://g | grep -i ${mac} | awk '{ print $1}')
    if [[ -z "${iface}" ]]; then
        warn "Cannot find matching interface for ${dev}"
        return
    fi

}



main $*


