[Watchdog] Watch mail queue (added inexpensive check via "find" instead of adding an API endpoint to dockerapi-mailcow)

master
andryyy 2020-05-31 11:39:20 +02:00
parent 7742b51be7
commit 063337b58d
No known key found for this signature in database
GPG Key ID: 8EC34FF2794E25EF
2 changed files with 49 additions and 1 deletions

View File

@ -88,6 +88,7 @@ log_msg() {
function mail_error() {
[[ -z ${1} ]] && return 1
# If exists, body will be the content of "/tmp/${1}", even if ${2} is set
[[ -z ${2} ]] && BODY="Service was restarted on $(date), please check your mailcow installation." || BODY="$(date) - ${2}"
WATCHDOG_NOTIFY_EMAIL=$(echo "${WATCHDOG_NOTIFY_EMAIL}" | sed 's/"//;s|"$||')
# Some exceptions for subject and body formats
@ -524,6 +525,35 @@ ratelimit_checks() {
return 1
}
mailq_checks() {
err_count=0
diff_c=0
THRESHOLD=${MAILQ_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
touch /tmp/mail_queue_status; echo "$(tail -50 /tmp/mail_queue_status)" > /tmp/mail_queue_status
MAILQ_LOG_STATUS=$(find /var/spool/postfix/deferred -type f | wc -l)
echo "Mail queue contains ${MAILQ_LOG_STATUS} items (critical limit is ${MAILQ_CRIT}) at $(date)" >> /tmp/mail_queue_status
err_c_cur=${err_count}
if [ ${MAILQ_LOG_STATUS} -ge ${MAILQ_CRIT} ]; then
err_count=$(( ${err_count} + 1 ))
echo "Mail queue contains ${MAILQ_LOG_STATUS} items (critical limit is ${MAILQ_CRIT}) at $(date)" >> /tmp/mail_queue_status
fi
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
progress "Mail queue" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
if [[ $? == 10 ]]; then
diff_c=0
sleep 60
else
diff_c=0
sleep $(( ( RANDOM % 60 ) + 20 ))
fi
done
return 1
}
fail2ban_checks() {
err_count=0
diff_c=0
@ -825,6 +855,18 @@ PID=$!
echo "Spawned postfix_checks with PID ${PID}"
BACKGROUND_TASKS+=(${PID})
(
while true; do
if ! mailq_checks; then
log_msg "Mail queue hit error limit"
echo mail_queue_status > /tmp/com_pipe
fi
done
) &
PID=$!
echo "Spawned mailq_checks with PID ${PID}"
BACKGROUND_TASKS+=(${PID})
(
while true; do
if ! dovecot_checks; then
@ -961,6 +1003,9 @@ while true; do
if [[ ${com_pipe_answer} == "ratelimit" ]]; then
log_msg "At least one ratelimit was applied"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
elif [[ ${com_pipe_answer} == "mail_queue_status" ]]; then
log_msg "Mail queue status is critical"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
elif [[ ${com_pipe_answer} == "external_checks" ]]; then
log_msg "Your mailcow is an open relay!"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please stop mailcow now and check your network configuration!"

View File

@ -396,7 +396,7 @@ services:
- /lib/modules:/lib/modules:ro
watchdog-mailcow:
image: mailcow/watchdog:1.77
image: mailcow/watchdog:1.78
# Debug
#command: /watchdog.sh
dns:
@ -404,6 +404,7 @@ services:
volumes:
- rspamd-vol-1:/var/lib/rspamd
- mysql-socket-vol-1:/var/run/mysqld/
- postfix-vol-1:/var/spool/postfix
- ./data/assets/ssl:/etc/ssl/mail/:ro
restart: always
environment:
@ -447,6 +448,8 @@ services:
- IPV6NAT_THRESHOLD=1
- RSPAMD_THRESHOLD=5
- OLEFY_THRESHOLD=5
- MAILQ_THRESHOLD=3
- MAILQ_CRIT=30
networks:
mailcow-network:
aliases: