[Watchdog] Watch olefy, thanks to @ntimo

master
andryyy 2019-07-07 21:59:20 +02:00
parent 4b3826a4b3
commit 4b9f022890
No known key found for this signature in database
GPG Key ID: 8EC34FF2794E25EF
1 changed files with 35 additions and 0 deletions

View File

@ -499,6 +499,31 @@ Empty
return 1
}
olefy_checks() {
err_count=0
diff_c=0
THRESHOLD=20
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
touch /tmp/olefy-mailcow; echo "$(tail -50 /tmp/olefy-mailcow)" > /tmp/olefy-mailcow
host_ip=$(get_container_ip olefy-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10055 2>> /tmp/olefy-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
progress "Olefy" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
if [[ $? == 10 ]]; then
diff_c=0
sleep 1
else
diff_c=0
sleep $(( ( RANDOM % 30 ) + 10 ))
fi
done
return 1
}
# Notify about start
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "watchdog-mailcow" "Watchdog started monitoring mailcow."
@ -617,6 +642,16 @@ done
) &
BACKGROUND_TASKS+=($!)
(
while true; do
if ! olefy_checks; then
log_msg "Olefy hit error limit"
echo olefy-mailcow > /tmp/com_pipe
fi
done
) &
BACKGROUND_TASKS+=($!)
(
while true; do
if ! acme_checks; then