Created on 04-05-2018 12:10 AM - edited 09-16-2022 01:42 AM
The script provided here can be used to perform health check of a node before installing DSX. There are certain pre-requisites when installing IBM DSX, without which installation cannot go through. This script helps to validate health of a node and check if it is fit to install DSX.
Note: Make changes to line 166 and 182 as per your installation. The drives may vary based on where you are doing the installation.
# /bin/bash
function checkRAM(){
local size="$1"
local limit="$2"
if [[ ${size} -lt ${limit} ]]; then
echo "WARNING: RAM size is ${size}GB, while requirement is ${limit}GB" | tee -a ${OUTPUT}
return 1
fi
}
function checkCPU(){
local size="$1"
local limit="$2"
if [[ ${size} -lt ${limit} ]]; then
echo "WARNING: CPU cores are ${size}, while requirement are ${limit}" | tee -a ${OUTPUT}
return 1
fi
}
function usage(){
echo "This script checks if this node meets requirements to install DSX-Local. "
echo "Arguments: "
echo "--type=[9nodes_master|9nodes_storage|9nodes_compute|3nodes] To specify a node type"
echo "--help To see help "
}
function helper(){
echo "##########################################################################################
Help:
./$(basename $0) --type=[9nodes_master|9nodes_storage|9nodes_compute|3nodes]
Specify a node type and start the validation
Checking preReq before DSX-local installation
Please run this script in all the nodes of your cluster
Differnt node types have different RAM/CPU requirement
List of validation:
CPU
WARNING for 9node master cpu core < 8, 9node storage cpu core < 16, 9node compute cpu core < 32; for 3node cpu core < 8
WARNING for 3node cpu core < 8
RAM
WARNING for 9node master RAM < 16GB, 9node storage RAM < 32GB, 9node compute RAM size < 64GB; for 3node RAM size < 16GB
WARNING for 3node RAM < 16GB
Disk latency test:
WARNING dd if=/dev/zero of=/root/testfile bs=512 count=1000 oflag=dsync The value should be less than 10s for copying 512 kB
ERROR: must be less than 60s for copying 512 kB,
Disk throughput test:
WARNING dd if=/dev/zero of=/root/testfile bs=1G count=1 oflag=dsync The value should be less than 5s for copying 1.1 GB
ERROR: must be less than 35s for copying 1.1 GB
Chrony/NTP
WARNING check is ntp/chrony is setup
Firewall disabled
ERROR firewalled and iptable is disabled
Disk
ERROR root directory should have at least 10 GB
WARNING partition for installer files should have one xfs disk formartted and mounted > ${INSTALLPATH_SIZE}GB
WARNING partition for data storage should have one xfs disk formartted and mounted > ${DATAPATH_SIZE}GB
Cron job check
ERROR check whether this node has a cronjob changes ip route, hosts file or firewall setting during installation
DSX Local 443 port check
ERROR check port 443 is open
SELinux check
ERROR check SElinux is either in enforcing or permissive mode
Gateway check
ERROR check is gateway is setup
DNS check
ERROR check is DNS service is setup which allow hostname map to ip
Docker check
ERROR Check to confirm Docker is not installed
Kubernetes check
ERROR Check to confirm Kubernetes is not installed
##########################################################################################"
}
function checkpath(){
local mypath="$1"
if [[ "$mypath" = "/" ]]; then
echo "ERROR: Can not use root path / as path" | tee -a ${OUTPUT}
usage
exit 1
fi
if [ ! -d "$mypath" ]; then
echo "ERROR: $mypath not found in node." | tee -a ${OUTPUT}
usage
exit 1
fi
}
#for internal usage
MASTERONE="MASTERONE_PLACEHOLDER" #if master one internal run will not check docker since we already install it
INSTALLPATH="INSTALLPATH_PLACEHOLDER"
DATAPATH="DATAPATH_PLACEHOLDER"
CPU=0
RAM=0
#Global parameter
INSTALLPATH_SIZE=150
DATAPATH_SIZE=350
#setup output file
OUTPUT="/tmp/preInstallCheckResult"
rm -f ${OUTPUT}
WARNING=0
ERROR=0
LOCALTEST=0
USE_SUDO=""
[[ "$(whoami)" != "root" ]] && USE_SUDO="sudo"
#input check
if [[ $# -ne 1 ]]; then
if [[ "$INSTALLPATH" != "" ]]; then
# This mean internal call the script, the script has already edited the INSTALLPATH DATAPATH CPU RAM by sed cmd
checkpath $INSTALLPATH
if [[ "$DATAPATH" != "" ]]; then
checkpath "$DATAPATH"
fi
else
usage
exit 1
fi
else
# This mean the user runs script, will prompt user to input the INSTALLPATH DATAPATH
if [[ "$1" = "--help" ]]; then
helper
exit 1
elif [ "$1" == "--type=9nodes_master" ] || [ "$1" == "--type=9nodes_storage" ] || [ "$1" == "--type=9nodes_compute" ] || [ "$1" == "--type=3nodes" ]; then
echo "Please enter the path of partition for installer files"
read INSTALLPATH
checkpath "$INSTALLPATH"
if [[ "$1" = "--type=9nodes_storage" ]]; then
echo "Please enter the path of partition for data storage"
read DATAPATH
checkpath "$DATAPATH"
CPU=16
RAM=32
elif [[ "$1" = "--type=9nodes_master" ]]; then
CPU=8
RAM=16
elif [[ "$1" = "--type=9nodes_compute" ]]; then
CPU=32
RAM=64
elif [[ "$1" = "--type=3nodes" ]]; then
echo "Please enter the path of partition for data storage"
read DATAPATH
checkpath "$DATAPATH"
CPU=32
RAM=64
else
echo "please only specify type among 9nodes_master/9nodes_storage/9nodes_compute/3nodes"
exit 1
fi
else
echo "Sorry the argument is invalid"
usage
exit 1
fi
fi
echo "##########################################################################################" > ${OUTPUT} 2>&1
echo "Checking Disk latency and Disk throughput" | tee -a ${OUTPUT}
# Note: Here location has been chose as /dev/xvdb as this is the storage space where I mounted /install.
# Check your storage before running the test and update accordingly
${USE_SUDO} dd if=/dev/xvdb of=${INSTALLPATH}/testfile bs=512 count=1000 oflag=dsync &> output
res=$(cat output | tail -n 1 | awk '{print $6}')
# writing this since bc may not be default support in customer environment
res_int=$(echo $res | grep -E -o "[0-9]+" | head -n 1)
if [[ $res_int -gt 60 ]]; then
echo "ERROR: Disk latency test failed. By copying 512 kB, the time must be shorter than 60s, recommended to be shorter than 10s, validation result is ${res_int}s " | tee -a ${OUTPUT}
ERROR=1
LOCALTEST=1
elif [[ $res_int -gt 10 ]]; then
echo "WARNING: Disk latency test failed. By copying 512 kB, the time recommended to be shorter than 10s, validation result is ${res_int}s " | tee -a ${OUTPUT}
WARNING=1
LOCALTEST=1
fi
# Note: Here location has been chose as /dev/xvdb as this is the storage space where I mounted /install.
# Check your storage before running the test and update accordingly
${USE_SUDO} dd if=/dev/xvdb of=${INSTALLPATH}/testfile bs=1G count=1 oflag=dsync &> output
res=$(cat output | tail -n 1 | awk '{print $6}')
# writing this since bc may not be default support in customer environment
res_int=$(echo $res | grep -E -o "[0-9]+" | head -n 1)
if [[ $res_int -gt 35 ]]; then
echo "ERROR: Disk throughput test failed. By copying 1.1 GB, the time must be shorter than 35s, recommended to be shorter than 5s, validation result is ${res_int}s " | tee -a ${OUTPUT}
ERROR=1
LOCALTEST=1
elif [[ $res_int -gt 5 ]]; then
echo "WARNING: Disk throughput test failed. By copying 1.1 GB, the time is recommended to be shorter than 5s, validation result is ${res_int}s " | tee -a ${OUTPUT}
WARNING=1
LOCALTEST=1
fi
rm -f output > /dev/null 2>&1
rm -f ${INSTALLPATH}/testfile > /dev/null 2>&1
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking gateway" | tee -a ${OUTPUT}
${USE_SUDO} ip route | grep "default" > /dev/null 2>&1
if [[ $? -ne 0 ]]; then
echo "ERROR: default gateway is not setup " | tee -a ${OUTPUT}
ERROR=1
LOCALTEST=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking DNS" | tee -a ${OUTPUT}
${USE_SUDO} cat /etc/resolv.conf | grep -E "nameserver [0-9]+.[0-9]+.[0-9]+.[0-9]+" &> /dev/null
if [[ $? -ne 0 ]]; then
echo "ERROR: DNS is not properly setup " | tee -a ${OUTPUT}
ERROR=1
LOCALTEST=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking chrony / ntp" | tee -a ${OUTPUT}
TIMESYNCON=1 # 1 for not sync 0 for sync
${USE_SUDO} systemctl status ntpd > /dev/null 2>&1
if [[ $? -eq 0 || $? -eq 3 ]]; then # 0 is active, 3 is active, both are ok here
TIMESYNCON=0
fi
${USE_SUDO} systemctl status chronyd > /dev/null 2>&1
if [[ $? -eq 0 || $? -eq 3 ]]; then # 0 is active, 3 is active, both are ok here
TIMESYNCON=0
fi
if [[ ${TIMESYNCON} -ne 0 ]]; then
echo "WARNING: NTP/Chronyc is not setup " | tee -a ${OUTPUT}
WARNING=1
LOCALTEST=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking if firewall is shutdown" | tee -a ${OUTPUT}
${USE_SUDO} service iptables status > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "WARNING: iptable is not disabled" | tee -a ${OUTPUT}
LOCALTEST=1
WARNING=1
fi
${USE_SUDO} service ip6tables status > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "WARNING: ip6table is not disabled" | tee -a ${OUTPUT}
LOCALTEST=1
WARNING=1
fi
${USE_SUDO} systemctl status firewalld > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "WARNING: firewalld is not disabled" | tee -a ${OUTPUT}
LOCALTEST=1
WARNING=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking SELinux" | tee -a ${OUTPUT}
selinux_res="$(${USE_SUDO} getenforce 2>&1)"
if [[ ! "${selinux_res}" =~ ("Permissive"|"permissive"|"Enforcing"|"enforcing") ]]; then
echo "ERROR: SElinux is not in enforcing or permissive mode" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking pre-exsiting cronjob" | tee -a ${OUTPUT}
${USE_SUDO} crontab -l | grep -E "*" &> /dev/null
if [[ $? -eq 0 ]] ; then
echo "WARNING: Found cronjob set up in background. Please make sure cronjob will not change ip route, hosts file or firewall setting during installation" | tee -a ${OUTPUT}
LOCALTEST=1
WARNING=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking size of root partition" | tee -a ${OUTPUT}
ROOTSIZE=$(${USE_SUDO} df -k -BG "/" | awk '{print($4 " " $6)}' | grep "/" | cut -d' ' -f1 | sed 's/G//g')
if [[ $ROOTSIZE -lt 10 ]] ; then
echo "ERROR: size of root partition is smaller than 10G" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking if install path: ${INSTALLPATH} have enough space (${INSTALLPATH_SIZE}GB)" | tee -a ${OUTPUT}
PARTITION=$(${USE_SUDO} df -k -BG | grep ${INSTALLPATH})
if [[ $? -ne 0 ]]; then
echo "ERROR: can not find the ${INSTALLPATH} partition you specified in install_path" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
else
PARTITION=$(echo $PARTITION | tail -n 1 | awk '{print $2}' | sed 's/G//g')
if [[ ${PARTITION} -lt ${INSTALLPATH_SIZE} ]]; then
echo "WARNING: size of install_path ${INSTALLPATH} is smaller than requirement (${INSTALLPATH_SIZE}GB)" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
if [[ $DATAPATH != "" && $DATAPATH != "DATAPATH_PLACEHOLDER" ]]; then
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "This is a storage node, checking if data path: ${DATAPATH} have enough space (${DATAPATH_SIZE}GB)" | tee -a ${OUTPUT}
cmd='df -k -BG | grep ${DATAPATH}'
PARTITION=$(${USE_SUDO} df -k -BG | grep ${DATAPATH})
if [[ $? -ne 0 ]]; then
echo "ERROR: can not find the ${DATAPATH} partition you specified in data_path" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
else
PARTITION=$(echo $PARTITION | tail -n 1 | awk '{print $2}' | sed 's/G//g')
if [[ ${PARTITION} -lt ${DATAPATH_SIZE} ]]; then
echo "WARNING: size of data_path ${DATAPATH} is smaller than requirement (${DATAPATH_SIZE}GB)" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
fi
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking if xfs is enabled" | tee -a ${OUTPUT}
${USE_SUDO} xfs_info ${INSTALLPATH} | grep "ftype=1" > /dev/null 2>&1
if [[ $? -ne 0 ]] ; then
echo "ERROR: xfs is not enabled, ftype=0, should be 1" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking CPU core numbers and RAM size" | tee -a ${OUTPUT}
# Get CPU numbers and min frequency
cpunum=$(${USE_SUDO} cat /proc/cpuinfo | grep '^processor' |wc -l | xargs)
if [[ ! ${cpunum} =~ ^[0-9]+$ ]]; then
echo "ERROR: Invalid cpu numbers '${cpunum}'" | tee -a ${OUTPUT}
else
checkCPU ${cpunum} ${CPU}
if [[ $? -eq 1 ]]; then
LOCALTEST=1
WARNING=1
fi
fi
mem=$(${USE_SUDO} cat /proc/meminfo | grep MemTotal | awk '{print $2}')
# Get Memory info
mem=$(( $mem/1000000 ))
if [[ ! ${mem} =~ ^[0-9]+$ ]]; then
echo "ERROR: Invalid memory size '${mem}'" | tee -a ${OUTPUT}
else
checkRAM ${mem} ${RAM}
if [[ $? -eq 1 ]]; then
LOCALTEST=1
WARNING=1
fi
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
if [[ ${MASTERONE} = "NO" || $# -eq 1 ]]; then
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking to confirm docker is not installed " | tee -a ${OUTPUT}
${USE_SUDO} which docker > /dev/null 2>&1
rc1=$?
${USE_SUDO} systemctl status docker &> /dev/null
rc2=$?
if [[ ${rc1} -eq 0 ]] || [[ ${rc2} -eq 0 ]]; then
echo "ERROR: Docker is already installed with a different version or settings, please uninstall Docker" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
fi
LOCALTEST=0
echo "##########################################################################################" >> ${OUTPUT} 2>&1
echo "Checking to confirm Kubernetes is not installed" | tee -a ${OUTPUT}
${USE_SUDO} systemctl status kubelet &> /dev/null
if [[ $? -eq 0 ]]; then
echo "ERROR: Kubernetes is already installed with a different version or settings, please uninstall Kubernetes" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
else
${USE_SUDO} which kubectl &> /dev/null
if [[ $? -eq 0 ]]; then
echo "ERROR: Kubernetes is already installed with a different version or settings, please uninstall Kubernetes" | tee -a ${OUTPUT}
LOCALTEST=1
ERROR=1
fi
fi
if [[ ${LOCALTEST} -eq 0 ]]; then
echo "PASS" | tee -a ${OUTPUT}
fi
echo
echo "##########################################################################################" >> ${OUTPUT} 2>&1
#log result
if [[ ${ERROR} -eq 1 ]]; then
echo "Finished with ERROR, please check ${OUTPUT}"
exit 2
elif [[ ${WARNING} -eq 1 ]]; then
echo "Finished with WARNING, please check ${OUTPUT}"
exit 1
else
echo "Finished successfully! This node meets the requirement" | tee -a ${OUTPUT}
exit 0
fi