diff --git a/data/Dockerfiles/dovecot/Dockerfile b/data/Dockerfiles/dovecot/Dockerfile index 4c95f929..117da71e 100644 --- a/data/Dockerfiles/dovecot/Dockerfile +++ b/data/Dockerfiles/dovecot/Dockerfile @@ -117,6 +117,7 @@ COPY supervisord.conf /etc/supervisor/supervisord.conf COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh COPY quarantine_notify.py /usr/local/bin/quarantine_notify.py COPY quota_notify.py /usr/local/bin/quota_notify.py +COPY repl_health.sh /usr/local/bin/repl_health.sh ENTRYPOINT ["/docker-entrypoint.sh"] CMD exec /usr/bin/supervisord -c /etc/supervisor/supervisord.conf diff --git a/data/Dockerfiles/dovecot/docker-entrypoint.sh b/data/Dockerfiles/dovecot/docker-entrypoint.sh index 265a28cd..fa633447 100755 --- a/data/Dockerfiles/dovecot/docker-entrypoint.sh +++ b/data/Dockerfiles/dovecot/docker-entrypoint.sh @@ -285,7 +285,8 @@ chmod +x /usr/lib/dovecot/sieve/rspamd-pipe-ham \ /usr/local/bin/clean_q_aged.sh \ /usr/local/bin/maildir_gc.sh \ /usr/local/sbin/stop-supervisor.sh \ - /usr/local/bin/quota_notify.py + /usr/local/bin/quota_notify.py \ + /usr/local/bin/repl_health.sh if [[ "${MASTER}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then # Setup cronjobs @@ -297,14 +298,17 @@ echo '30 1 * * * root /usr/local/bin/sa-rules.sh >> /dev/console 2>&1' > /et echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize echo '*/20 * * * * vmail /usr/local/bin/quarantine_notify.py >> /dev/console 2>&1' > /etc/cron.d/quarantine_notify echo '15 4 * * * vmail /usr/local/bin/clean_q_aged.sh >> /dev/console 2>&1' > /etc/cron.d/clean_q_aged -# Fix more than 1 hardlink issue -touch /etc/crontab /etc/cron.*/* +echo '*/5 * * * * vmail /usr/local/bin/repl_health.sh >> /dev/console 2>&1' > /etc/cron.d/repl_health else echo '25 * * * * vmail /usr/local/bin/maildir_gc.sh >> /dev/console 2>&1' > /etc/cron.d/maildir_gc echo '30 1 * * * root /usr/local/bin/sa-rules.sh >> /dev/console 2>&1' > /etc/cron.d/sa-rules echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize +echo '*/5 * * * * vmail /usr/local/bin/repl_health.sh >> /dev/console 2>&1' > /etc/cron.d/repl_health fi +# Fix more than 1 hardlink issue +touch /etc/crontab /etc/cron.*/* + # Clean old PID if any [[ -f /var/run/dovecot/master.pid ]] && rm /var/run/dovecot/master.pid diff --git a/data/Dockerfiles/dovecot/repl_health.sh b/data/Dockerfiles/dovecot/repl_health.sh new file mode 100755 index 00000000..be17dc1f --- /dev/null +++ b/data/Dockerfiles/dovecot/repl_health.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Do not attempt to write to slave +if [[ ! -z ${REDIS_SLAVEOF_IP} ]]; then + REDIS_CMDLINE="redis-cli -h ${REDIS_SLAVEOF_IP} -p ${REDIS_SLAVEOF_PORT}" +else + REDIS_CMDLINE="redis-cli -h redis -p 6379" +fi + +# Is replication active? +# grep on file is less expensive than doveconf +if ! grep -qi mail_replica /etc/dovecot/dovecot.conf; then + ${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH 1 > /dev/null + exit +fi + +FAILED_SYNCS=$(doveadm replicator status | grep "Waiting 'failed' requests" | grep -oE '[0-9]+') + +# Set amount of failed jobs as DOVECOT_REPL_HEALTH +# 1 failed job for mailcow.local is expected and healthy +if [[ "${FAILED_SYNCS}" != 1 ]]; then + printf "Dovecot replicator has %d failed jobs\n" "${FAILED_SYNCS}" + ${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH "${FAILED_SYNCS}" > /dev/null +else + ${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH 1 > /dev/null +fi diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index 428fd34d..7cf7530b 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -438,6 +438,33 @@ dovecot_checks() { return 1 } +dovecot_repl_checks() { + err_count=0 + diff_c=0 + THRESHOLD=${DOVECOT_REPL_THRESHOLD} + D_REPL_STATUS=$(redis-cli -h redis -r GET DOVECOT_REPL_HEALTH) + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + err_c_cur=${err_count} + D_REPL_STATUS=$(redis-cli --raw -h redis GET DOVECOT_REPL_HEALTH) + if [[ "${D_REPL_STATUS}" != "1" ]]; then + err_count=$(( ${err_count} + 1 )) + fi + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "Dovecot replication" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + if [[ $? == 10 ]]; then + diff_c=0 + sleep 1 + else + diff_c=0 + sleep $(( ( RANDOM % 60 ) + 20 )) + fi + done + return 1 +} + phpfpm_checks() { err_count=0 diff_c=0 @@ -807,6 +834,18 @@ PID=$! echo "Spawned dovecot_checks with PID ${PID}" BACKGROUND_TASKS+=(${PID}) +( +while true; do + if ! dovecot_repl_checks; then + log_msg "Dovecot hit error limit" + echo dovecot_repl_checks > /tmp/com_pipe + fi +done +) & +PID=$! +echo "Spawned dovecot_repl_checks with PID ${PID}" +BACKGROUND_TASKS+=(${PID}) + ( while true; do if ! rspamd_checks; then @@ -925,6 +964,9 @@ while true; do elif [[ ${com_pipe_answer} == "mysql_repl_checks" ]]; then log_msg "MySQL replication is not working properly" [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" + elif [[ ${com_pipe_answer} == "dovecot_repl_checks" ]]; then + log_msg "Dovecot replication is not working properly" "Please check doveadm replicator status" + [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" elif [[ ${com_pipe_answer} == "acme-mailcow" ]]; then log_msg "acme-mailcow did not complete successfully" [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for further information." diff --git a/docker-compose.yml b/docker-compose.yml index f81f5232..9239895e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -189,7 +189,7 @@ services: - sogo dovecot-mailcow: - image: mailcow/dovecot:1.117 + image: mailcow/dovecot:1.118 depends_on: - mysql-mailcow dns: @@ -394,7 +394,7 @@ services: - /lib/modules:/lib/modules:ro watchdog-mailcow: - image: mailcow/watchdog:1.73 + image: mailcow/watchdog:1.74 # Debug #command: /watchdog.sh dns: @@ -436,6 +436,7 @@ services: - POSTFIX_THRESHOLD=8 - CLAMD_THRESHOLD=15 - DOVECOT_THRESHOLD=12 + - DOVECOT_REPL_THRESHOLD=2 - PHPFPM_THRESHOLD=5 - RATELIMIT_THRESHOLD=1 - FAIL2BAN_THRESHOLD=1