From 4b9f0228900246a1c4773d98477ec9ae07701b65 Mon Sep 17 00:00:00 2001 From: andryyy Date: Sun, 7 Jul 2019 21:59:20 +0200 Subject: [PATCH] [Watchdog] Watch olefy, thanks to @ntimo --- data/Dockerfiles/watchdog/watchdog.sh | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index d2e1924c..09c9458d 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -499,6 +499,31 @@ Empty return 1 } +olefy_checks() { + err_count=0 + diff_c=0 + THRESHOLD=20 + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + touch /tmp/olefy-mailcow; echo "$(tail -50 /tmp/olefy-mailcow)" > /tmp/olefy-mailcow + host_ip=$(get_container_ip olefy-mailcow) + err_c_cur=${err_count} + /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10055 2>> /tmp/olefy-mailcow 1>&2; err_count=$(( ${err_count} + $? )) + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "Olefy" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + if [[ $? == 10 ]]; then + diff_c=0 + sleep 1 + else + diff_c=0 + sleep $(( ( RANDOM % 30 ) + 10 )) + fi + done + return 1 +} + # Notify about start [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "watchdog-mailcow" "Watchdog started monitoring mailcow." @@ -617,6 +642,16 @@ done ) & BACKGROUND_TASKS+=($!) +( +while true; do + if ! olefy_checks; then + log_msg "Olefy hit error limit" + echo olefy-mailcow > /tmp/com_pipe + fi +done +) & +BACKGROUND_TASKS+=($!) + ( while true; do if ! acme_checks; then