#! /bin/sh

# chkconfig: 345 90 13
# description: Starts and stops the pgpool.
# processname: pgpool
# pidfile: /var/run/pgpool.pid
#

. /etc/profile.d/smx.sh
. /etc/profile.d/jakarta.sh
. /etc/profile.d/java.sh

LANG=ja_JP.eucJP

DEBUG=0

### syslog setting ###
SYSLOG_FLAG=1       # 0=off , 1=on(pgpoollog->localfile[v3.0]) , 2=on(pgpoollog->syslog)
FACILITY="local5"   # smx shellscript's log use facility 'local5'
PRI_DEBUG="debug"   # 7
PRI_INFO="info"     # 6
PRI_WARN="warning"  # 4
PRI_ERR="err"       # 3
PRI_CRIT="crit"     # 2
#
SYSLOG_TAG="pgpool_smx"
######################


# ----- define cmd ----- #
awk=/bin/awk
cat=/bin/cat
date=/bin/date
echo=/bin/echo
grep=/bin/grep
head=/usr/bin/head
kill=/bin/kill
logger=/usr/bin/logger  # for syslog
mv=/bin/mv
ps=/bin/ps
rm=/bin/rm
sleep=/bin/sleep
sort=/bin/sort
#su=/bin/su
touch=/bin/touch

ssh=/usr/bin/ssh
psql=/usr/bin/psql
awk=/bin/awk
# ------------------ #

# define
BASE_DIR=/usr/etc
PGPOOL_CONF=${BASE_DIR}/pgpool.conf
LOG_PROPS=${BASE_DIR}/log.properties
PGPOOL_LOG_DIR="$SMX_LOG_HOME/cluster"
PGPOOL_BOOTLOG="$PGPOOL_LOG_DIR/pgpool_smx_boot.log"

pidfile=""
pidfilename="pgpool.pid"
lockfile="/var/lock/subsys/pgpool-smx"
SLEEPING_COUNT=3

PGPOOL=/usr/bin/pgpool

PRC_STR="/usr/bin/pgpool -f"
SVR_NAME="pgpool-smx"

SMXD_USER="smxd"
POSTGRES_USER="postgres"
PROCESS_CHECK_ALL="${SMX_HOME}/common/bin/monitor/process_check_all.sh"
PGPOOL_CONF_FILE="/usr/etc/pgpool.conf"
NANAKODB="nanakoDB"
LOGDB="logDB"

# boot repeat times (default = 60). (near seconds)
BOOT_REPEAT_TIMES=60


# ----- function ----- #
writeLog(){   # args : priority(for syslog) , log_msg
  PRIORITY="$1"
  LOG_MSG="$2"
  CRT_DATE=`$date`;CRT_DATE="[$CRT_DATE]"
  $echo "$CRT_DATE $LOG_MSG" >> $PGPOOL_BOOTLOG
  writeSyslog "$PRIORITY" "$LOG_MSG"
}

writeSyslog(){  # args : priority(for syslog) , log_msg
  if [ ${SYSLOG_FLAG} -eq 1 ] ; then
    $logger -p ${FACILITY}.${1} -t "${SYSLOG_TAG}[$$]" "${2}"
  fi
}

getPID(){
  $ps -eww -o pgid,pid,ppid,s,args | $grep "$PRC_STR" | $grep -v 'grep' | $grep -v "${SVR_NAME}" | $sort -n -k 3 | $head -n1 | $awk '{print $2}'
}

getPIDfile(){
  eval `$cat ${PGPOOL_CONF} | $grep -v ^# | $grep "logdir"`   # get logdir
  pidfile=${logdir}/${pidfilename}
}

load_log_properties(){
  eval `$cat ${LOG_PROPS} | $grep -v ^#`
}

log_rotate(){
  load_log_properties
  writeLog "${PRI_INFO}" "Load log.properties >>>"
  writeLog "${PRI_INFO}" "Log filename  : ${log_filename}"
#  writeLog "${PRI_INFO}" "Rotate number : ${rotate_num}"
#  
#  num=${rotate_num}
#  
#  # delete most old file.
#  $rm -f ${PGPOOL_LOG_DIR}/${log_filename}.${num} > /dev/null 2>&1
#  
#  # .9 -> .10 ... .2 -> .1
#  while [ ${num} -gt 1 ]
#  do
#    sakiNum=${num}          # to  (ex:10)
#    num=`expr ${num} - 1`   # from(ex: 9)
#    $mv ${PGPOOL_LOG_DIR}/${log_filename}.${num} ${PGPOOL_LOG_DIR}/${log_filename}.${sakiNum} > /dev/null 2>&1
#  done
#  
#  # most new file -> .1
#  $mv ${PGPOOL_LOG_DIR}/${log_filename} ${PGPOOL_LOG_DIR}/${log_filename}.${num} > /dev/null 2>&1
#  
}

echo_sp(){  # args:msg
  $echo -n "${1}"
}

stdout(){   # args:msg
  $echo "${1}"
}

errout(){   # args:msg
  $echo "${1}" >&2
}

# -------------------- #

start_pgpool_smx(){
  MSG="$SVR_NAME START."
  stdout "$MSG"
  writeLog "${PRI_INFO}" "$MSG"
  log_rotate
  if [ ${SYSLOG_FLAG} -eq 2 ] ; then
    # output to syslog
    $PGPOOL -f ${PGPOOL_CONF} -n 2>&1 | $logger -p ${FACILITY}.${PRI_INFO} -t "${SYSLOG_TAG}[$$]" &
  else
    # output to logfile[v3.0]
    $PGPOOL -f ${PGPOOL_CONF} -n >> ${PGPOOL_LOG_DIR}/${log_filename} 2>&1 &
  fi
  
  $sleep $SLEEPING_COUNT
  
  SMX_PID=`getPID`
  if [ -z $SMX_PID ] ; then
    # Runing Error
    MSG="Fail to starting $SVR_NAME!!"
    errout "$MSG"
    writeLog "${PRI_ERR}" "$MSG"
    exit 1
  else
    # Running ok
    $touch $lockfile
    writeLog "${PRI_INFO}" "Success to starting $SVR_NAME."
  fi
}

checkBothDBStatus(){
  # Return : 0=both on , 1=both off , 2=either on
  
  # get backend_host_name
  tmpRet=`$cat ${PGPOOL_CONF_FILE} | $grep "^backend_host_name"` ; eval $tmpRet
  # get secondary_backend_host_name
  tmpRet=`$cat ${PGPOOL_CONF_FILE} | $grep "^secondary_backend_host_name"` ; eval $tmpRet
  
  # check primary pgsql process status. : syslogDisableMode
  su - ${SMXD_USER} -c "$ssh -2 ${backend_host_name} ${PROCESS_CHECK_ALL} 1 | $grep 'Database:on'" > /dev/null 2>&1
  ret1stDB=$?
  # check secondary pgsql process status. : syslogDisableMode
  su - ${SMXD_USER} -c "$ssh -2 ${secondary_backend_host_name} ${PROCESS_CHECK_ALL} 1 | $grep 'Database:on'" > /dev/null 2>&1
  ret2ndDB=$?
  
  if [ $ret1stDB -eq 0 -a $ret2ndDB -eq 0 ] ; then
    # Both psql is operating. 
    $echo 0
  elif [ $ret1stDB -ne 0 -a $ret2ndDB -ne 0 ] ; then
    # Both psql is stopping. 
    $echo 1
  else
    # Either of psql is operating.
    $echo 2
  fi
}

checkReplicateDB(){
  # check syncDB(replication_enabled)
  # Return : 0=ok , 1=err
  tmpRetFile=/tmp/replicatevalue.tmp$$
  replicateRet=0
  su - ${POSTGRES_USER} -c "$psql -F, -t -A -U ${NANAKODB} -c \"show pool_status\"" 2>/dev/null | $grep 'replication_enabled' > ${tmpRetFile}
  psqlRes=$?
  if [ ${psqlRes} -eq 0 ] ; then
    # psql success
    valReplicationEnabled=`$cat ${tmpRetFile} | $awk -F',' '{print $2}'`
    if [ ${valReplicationEnabled} -eq 0 ] ; then
      # replication error
      replicateRet=1
    fi
  else
    # psql error
    replicateRet=1
  fi
  $rm -f ${tmpRetFile} > /dev/null 2>&1
  $echo ${replicateRet}
}

checkSyncDB(){
  # check syncDB(data marge)
  # Return : 0=ok , 1=err
  syncRet=0
  for TABLE in `su - ${POSTGRES_USER} -c "$psql -F, -t -A -U ${NANAKODB} -c \"\\\d\"" | $awk -F, '{print $2}'`
  do
    if [ $DEBUG -ne 0 ] ; then writeLog "${PRI_INFO}" ${TABLE} ; fi
    su - ${POSTGRES_USER} -c "$psql -t -A -U ${NANAKODB} -c \"select * from ${TABLE}\"" > /dev/null 2>&1
    if [ $? -ne 0 ] ; then
      syncRet=1
      writeLog "${PRI_ERR}" "Unmatched table : ${TABLE}"
    fi
  done
  for TABLE in `su - ${POSTGRES_USER} -c "$psql -F, -t -A -U ${LOGDB} -c \"\\\d\"" | $grep "_log" | $awk -F, '{print $2}'`
  do
    if [ $DEBUG -ne 0 ] ; then writeLog "${PRI_INFO}" ${TABLE} ; fi
    su - ${POSTGRES_USER} -c "$psql -t -A -U ${LOGDB} -c \"select time_stamp from ${TABLE}\"" > /dev/null 2>&1
    if [ $? -ne 0 ] ; then
      syncRet=1
      writeLog "${PRI_ERR}" "Unmatched table : ${TABLE}"
    fi
  done
  $echo ${syncRet}
}


start(){
  # Check duplicate boot
  SMX_PID=`getPID`
  if [ ! -z $SMX_PID ] ; then
    # Service already started.
    MSG="$SVR_NAME already running."
    errout "$MSG"
    writeLog "${PRI_INFO}" "$MSG"
#    exit 1
  else
    
    # repeat check `checkBothDBStatus`
    repeatCnt=0
    while [ ${repeatCnt} -lt ${BOOT_REPEAT_TIMES} ] ;
    do
      # get both DB status(0,1,2)
      bothDBstatus=`checkBothDBStatus`
      if [ $bothDBstatus -eq 0 ] ; then
        writeLog "${PRI_INFO}" "Both DB is ready. [ cnt = $repeatCnt ]"
        break;
      else
        writeLog "${PRI_WARN}" "Both DB is not ready. [ ST = $bothDBstatus ] [ cnt = $repeatCnt ]"
        $sleep 1
        repeatCnt=`expr ${repeatCnt} + 1`
      fi
      # never beark -> (bothDBstatus=1) : Both DB is not ready.
      #                (bothDBstatus=2) : Either of DB is not ready.
    done
    
    if [ $bothDBstatus -eq 0 ] ; then
      # Both psql is operating.
      # start pgpool-smx
      start_pgpool_smx
      
      # check syncDB(replication_enabled)
      echo_sp "Replication check ..."
      replicateRet=`checkReplicateDB`
      if [ ${replicateRet} -eq 0 ] ; then
        stdout "OK"
      else
        stdout "ERR"
      fi
      
      # check syncDB(data marge)
      syncRet=0
      if [ ${replicateRet} -eq 0 ] ; then
        echo_sp "Synchronization check "
        for TABLE in `su - ${POSTGRES_USER} -c "$psql -F, -t -A -U ${NANAKODB} -c \"\\\d\"" | $awk -F, '{print $2}'`
        do
          if [ $DEBUG -ne 0 ] ; then $echo ${TABLE} ; fi
          su - ${POSTGRES_USER} -c "$psql -t -A -U ${NANAKODB} -c \"select * from ${TABLE}\"" > /dev/null 2>&1
          if [ $? -ne 0 ] ; then
            syncRet=1
            writeLog "${PRI_ERR}" "Unmatched table : ${TABLE}"
          fi
          echo_sp "."
        done
        for TABLE in `su - ${POSTGRES_USER} -c "$psql -F, -t -A -U ${LOGDB} -c \"\\\d\"" | $grep "_log" | $awk -F, '{print $2}'`
        do
          if [ $DEBUG -ne 0 ] ; then $echo ${TABLE} ; fi
          su - ${POSTGRES_USER} -c "$psql -t -A -U ${LOGDB} -c \"select time_stamp from ${TABLE}\"" > /dev/null 2>&1
          if [ $? -ne 0 ] ; then
            syncRet=1
            writeLog "${PRI_ERR}" "Unmatched table : ${TABLE}"
          fi
          echo_sp "."
        done
      fi
      if [ ${syncRet} -eq 0 ] ; then
        stdout "OK"
      else
        stdout "ERR"
      fi
      
      if [ ${syncRet} -ne 0 -o ${replicateRet} -ne 0 ] ; then
        # replication_enabled if false! -> pgpool-smx stop
        stop
        errout "START ERROR!!"
        MSG="Both Database cannot synchronize! Cannot start Cluster."
        errout "$MSG"
        writeLog "${PRI_ERR}" "$MSG"
        exit 1
      fi

      # when the flg_file exists, it is deleted
      flg_file=`$cat ${PGPOOL_CONF} | $grep "^health_check_file_name"` ; eval $flg_file
      if [ -e ${health_check_file_name} ] ; then
        $rm -f ${health_check_file_name} > /dev/null 2>&1
      fi
      
    elif [ $bothDBstatus -eq 1 ] ; then
      # Both psql is stopping.
      errout "START ERROR!!"
      MSG="Both Database is stopping!! Cannot start Cluster."
      errout "$MSG"
      writeLog "${PRI_ERR}" "$MSG"
      exit 1
      
    else
      # Either of psql is operating.
      # quit
      errout "START ERROR!!"
      MSG="Either of Database is stopping. Cluster was not started."
      errout "$MSG"
      writeLog "${PRI_ERR}" "$MSG"
      exit 1
    fi
  fi
}

single(){
  # Check duplicate boot
  SMX_PID=`getPID`
  if [ ! -z $SMX_PID ] ; then
    # Service already started.
    MSG="$SVR_NAME already running."
    errout "$MSG"
    writeLog "${PRI_INFO}" "$MSG"
#    exit 1
  else
    # get both DB status(0,1,2)
    bothDBstatus=`checkBothDBStatus`
    
    if [ $bothDBstatus -eq 0 ] ; then
      # Both psql is operating.
      errout "START ERROR!!"
      MSG="Both Database is starting. Therefore cannot start single mode."
      errout "$MSG"
      writeLog "${PRI_ERR}" "$MSG"
      exit 1
      
    elif [ $bothDBstatus -eq 1 ] ; then
      # Both psql is stopping.
      errout "START ERROR!!"
      MSG="Both Database is stopping!! Cannot start Cluster."
      errout "$MSG"
      writeLog "${PRI_ERR}" "$MSG"
      exit 1
      
    else
      # Either of psql is operating.
      # start pgpool-smx
      start_pgpool_smx
    fi
  fi
}

stop(){
  # Check running.
  SMX_PID=`getPID`
  if [ ! -z $SMX_PID ] ; then
    MSG="$SVR_NAME STOP."
    stdout "$MSG"
    writeLog "${PRI_INFO}" "$MSG"
#    $PGPOOL -f ${PGPOOL_CONF} -m fast stop 2>&1 &
    $PGPOOL -f ${PGPOOL_CONF} -m fast stop 2>&1
    
    $sleep $SLEEPING_COUNT
    
    SMX_PID=`getPID`
    if [ -z $SMX_PID ] ; then
      # Stop ok
      $rm -f $lockfile > /dev/null
      writeLog "${PRI_INFO}" "Success to stopping $SVR_NAME."
    else
      # Stop Error -> kill process
      $kill ${SMX_PID} > /dev/null 2>&1
      MSG="Fail to stopping $SVR_NAME!!"
      errout "$MSG"
      writeLog "${PRI_ERR}" "$MSG"
      exit 1
    fi
  else
    # Service not runnning.
    MSG="$SVR_NAME is not running."
    errout "$MSG"
    writeLog "${PRI_INFO}" "$MSG"
#    exit 1
  fi
}

restart(){
  MSG="$SVR_NAME RESTART."
  stdout "$MSG"
  writeLog "${PRI_INFO}" "$MSG"
  stop
  start
}

status(){
  # ----- define status ----- #
  ST000="Cluster is running. Replication OK. Synchronization OK."
  ST001="Cluster is running. Replication OK. But , Synchronization ERROR!!"
  ST010="Cluster is running. But,Replication ERROR!!."
  ST020="Cluster is running. But,both Database is stopping!!"
  ST030="Cluster is running. But,either of Database is stopping."
  ST100="Cluster has stopped."
  ST200="Cluster is running , but PID_FILE not exist."
  # ------------------------- #
  
  getPIDfile
  SMX_PID=`getPID`
  FILE_PID=`$cat $pidfile 2>/dev/null`
  if [ -n "$SMX_PID" ] ; then
    
    if [ -n "$FILE_PID" -a "$FILE_PID" = "$SMX_PID" ] ; then
      
      # running -> check both DB status
      bothDBstatus=`checkBothDBStatus`
      if [ $bothDBstatus -eq 0 ] ; then
        # Both pgsql is operating. 
        
        # both DB is operating -> check data synchronize
        replicateRet=`checkReplicateDB`
        if [ ${replicateRet} -eq 0 ] ; then
          
          syncRet=`checkSyncDB`
          if [ ${syncRet} -eq 0 ] ; then
            # synchronize ok
            MSG="$ST000" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
          else
            # synchronize err
            MSG="$ST001" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
          fi
          
        else
          # replication status err
          MSG="$ST010" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
        fi
        
      elif [ $bothDBstatus -eq 1 ] ; then
        # Both pgsql is stopping.
        MSG="$ST020" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
      else
        # Either of pgsql is operating.
        MSG="$ST030" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
      fi
      
    else
      MSG="$ST200" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
    fi
    
  else
    # Process not found.
    MSG="$ST100" ; stdout "$MSG" ; writeLog "${PRI_INFO}" "$MSG"
  fi
}


# See how we were called.
case "$1" in
  start)
	start
	;;
  single)
	single
	;;
  stop)
	stop
	;;
  status)
	status
	;;
  restart)
	restart
	;;
  *)
	errout "Usage: pgpool-smx {start|single|stop|status|restart}"
	exit 1
esac

exit 0
