[Watchdog] Minor fixes, print last log lines on error

master
andryyy 2019-03-04 17:56:27 +01:00
parent ed7a8431b4
commit 5bc8289d32
No known key found for this signature in database
GPG Key ID: 8EC34FF2794E25EF
1 changed files with 13 additions and 10 deletions

View File

@ -37,7 +37,7 @@ progress() {
log_msg() {
if [[ ${2} != "no_redis" ]]; then
redis-cli -h redis LPUSH WATCHDOG_LOG "{\"time\":\"$(date +%s)\",\"message\":\"$(printf '%s' "${1}" | \
tr '%&;$"_[]{}-\r\n' ' ')\"}" > /dev/null
tr '\r\n%&;$"_[]{}-' ' ')\"}" > /dev/null
fi
echo $(date) $(printf '%s\n' "${1}")
}
@ -115,7 +115,7 @@ nginx_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/nginx-mailcow
touch /tmp/nginx-mailcow; echo "$(tail -50 /tmp/nginx-mailcow)" > /tmp/nginx-mailcow
host_ip=$(get_container_ip nginx-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_http -4 -H ${host_ip} -u / -p 8081 2>> /tmp/nginx-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -140,7 +140,7 @@ unbound_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/unbound-mailcow
touch /tmp/unbound-mailcow; echo "$(tail -50 /tmp/unbound-mailcow)" > /tmp/unbound-mailcow
host_ip=$(get_container_ip unbound-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_dns -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -172,7 +172,7 @@ mysql_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/mysql-mailcow
touch /tmp/mysql-mailcow; echo "$(tail -50 /tmp/mysql-mailcow)" > /tmp/mysql-mailcow
host_ip=$(get_container_ip mysql-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_mysql -s /var/run/mysqld/mysqld.sock -u ${DBUSER} -p ${DBPASS} -d ${DBNAME} 2>> /tmp/mysql-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -198,7 +198,7 @@ sogo_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/sogo-mailcow
touch /tmp/sogo-mailcow; echo "$(tail -50 /tmp/sogo-mailcow)" > /tmp/sogo-mailcow
host_ip=$(get_container_ip sogo-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_http -4 -H ${host_ip} -u /SOGo.index/ -p 20000 -R "SOGo\.MainUI" 2>> /tmp/sogo-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -223,7 +223,7 @@ postfix_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/postfix-mailcow
touch /tmp/postfix-mailcow; echo "$(tail -50 /tmp/postfix-mailcow)" > /tmp/postfix-mailcow
host_ip=$(get_container_ip postfix-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 589 -f "watchdog@invalid" -C "RCPT TO:null@localhost" -C DATA -C . -R 250 2>> /tmp/postfix-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -249,7 +249,7 @@ clamd_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/clamd-mailcow
touch /tmp/clamd-mailcow; echo "$(tail -50 /tmp/clamd-mailcow)" > /tmp/clamd-mailcow
host_ip=$(get_container_ip clamd-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_clamd -4 -H ${host_ip} 2>> /tmp/clamd-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -274,7 +274,7 @@ dovecot_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/dovecot-mailcow
touch /tmp/dovecot-mailcow; echo "$(tail -50 /tmp/dovecot-mailcow)" > /tmp/dovecot-mailcow
host_ip=$(get_container_ip dovecot-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 24 -f "watchdog@invalid" -C "RCPT TO:<watchdog@invalid>" -L -R "User doesn't exist" 2>> /tmp/dovecot-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -303,7 +303,7 @@ phpfpm_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/php-fpm-mailcow
touch /tmp/php-fpm-mailcow; echo "$(tail -50 /tmp/php-fpm-mailcow)" > /tmp/php-fpm-mailcow
host_ip=$(get_container_ip php-fpm-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_tcp -H ${host_ip} -p 9001 2>> /tmp/php-fpm-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
@ -388,7 +388,7 @@ rspamd_checks() {
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
cat /dev/null > /tmp/rspamd-mailcow
touch /tmp/rspamd-mailcow; echo "$(tail -50 /tmp/rspamd-mailcow)" > /tmp/rspamd-mailcow
host_ip=$(get_container_ip rspamd-mailcow)
err_c_cur=${err_count}
SCORE=$(/usr/bin/curl -s --data-binary @- --unix-socket /var/lib/rspamd/rspamd.sock http://rspamd/scan -d '
@ -561,6 +561,9 @@ while true; do
CONTAINER_ID=
HAS_INITDB=
read com_pipe_answer </tmp/com_pipe
if [ -s "/tmp/${com_pipe_answer}" ]; then
cat "/tmp/${com_pipe_answer}"
fi
if [[ ${com_pipe_answer} == "ratelimit" ]]; then
log_msg "At least one ratelimit was applied"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "No further information available."