[Watchdog] Watch replication, if any (unsupported)
parent
d86be26387
commit
5944595be3
|
@ -117,6 +117,7 @@ COPY supervisord.conf /etc/supervisor/supervisord.conf
|
||||||
COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh
|
COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh
|
||||||
COPY quarantine_notify.py /usr/local/bin/quarantine_notify.py
|
COPY quarantine_notify.py /usr/local/bin/quarantine_notify.py
|
||||||
COPY quota_notify.py /usr/local/bin/quota_notify.py
|
COPY quota_notify.py /usr/local/bin/quota_notify.py
|
||||||
|
COPY repl_health.sh /usr/local/bin/repl_health.sh
|
||||||
|
|
||||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
CMD exec /usr/bin/supervisord -c /etc/supervisor/supervisord.conf
|
CMD exec /usr/bin/supervisord -c /etc/supervisor/supervisord.conf
|
||||||
|
|
|
@ -285,7 +285,8 @@ chmod +x /usr/lib/dovecot/sieve/rspamd-pipe-ham \
|
||||||
/usr/local/bin/clean_q_aged.sh \
|
/usr/local/bin/clean_q_aged.sh \
|
||||||
/usr/local/bin/maildir_gc.sh \
|
/usr/local/bin/maildir_gc.sh \
|
||||||
/usr/local/sbin/stop-supervisor.sh \
|
/usr/local/sbin/stop-supervisor.sh \
|
||||||
/usr/local/bin/quota_notify.py
|
/usr/local/bin/quota_notify.py \
|
||||||
|
/usr/local/bin/repl_health.sh
|
||||||
|
|
||||||
if [[ "${MASTER}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then
|
if [[ "${MASTER}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then
|
||||||
# Setup cronjobs
|
# Setup cronjobs
|
||||||
|
@ -297,14 +298,17 @@ echo '30 1 * * * root /usr/local/bin/sa-rules.sh >> /dev/console 2>&1' > /et
|
||||||
echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize
|
echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize
|
||||||
echo '*/20 * * * * vmail /usr/local/bin/quarantine_notify.py >> /dev/console 2>&1' > /etc/cron.d/quarantine_notify
|
echo '*/20 * * * * vmail /usr/local/bin/quarantine_notify.py >> /dev/console 2>&1' > /etc/cron.d/quarantine_notify
|
||||||
echo '15 4 * * * vmail /usr/local/bin/clean_q_aged.sh >> /dev/console 2>&1' > /etc/cron.d/clean_q_aged
|
echo '15 4 * * * vmail /usr/local/bin/clean_q_aged.sh >> /dev/console 2>&1' > /etc/cron.d/clean_q_aged
|
||||||
# Fix more than 1 hardlink issue
|
echo '*/5 * * * * vmail /usr/local/bin/repl_health.sh >> /dev/console 2>&1' > /etc/cron.d/repl_health
|
||||||
touch /etc/crontab /etc/cron.*/*
|
|
||||||
else
|
else
|
||||||
echo '25 * * * * vmail /usr/local/bin/maildir_gc.sh >> /dev/console 2>&1' > /etc/cron.d/maildir_gc
|
echo '25 * * * * vmail /usr/local/bin/maildir_gc.sh >> /dev/console 2>&1' > /etc/cron.d/maildir_gc
|
||||||
echo '30 1 * * * root /usr/local/bin/sa-rules.sh >> /dev/console 2>&1' > /etc/cron.d/sa-rules
|
echo '30 1 * * * root /usr/local/bin/sa-rules.sh >> /dev/console 2>&1' > /etc/cron.d/sa-rules
|
||||||
echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize
|
echo '0 2 * * * root /usr/bin/curl http://solr:8983/solr/dovecot-fts/update?optimize=true >> /dev/console 2>&1' > /etc/cron.d/solr-optimize
|
||||||
|
echo '*/5 * * * * vmail /usr/local/bin/repl_health.sh >> /dev/console 2>&1' > /etc/cron.d/repl_health
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Fix more than 1 hardlink issue
|
||||||
|
touch /etc/crontab /etc/cron.*/*
|
||||||
|
|
||||||
# Clean old PID if any
|
# Clean old PID if any
|
||||||
[[ -f /var/run/dovecot/master.pid ]] && rm /var/run/dovecot/master.pid
|
[[ -f /var/run/dovecot/master.pid ]] && rm /var/run/dovecot/master.pid
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Do not attempt to write to slave
|
||||||
|
if [[ ! -z ${REDIS_SLAVEOF_IP} ]]; then
|
||||||
|
REDIS_CMDLINE="redis-cli -h ${REDIS_SLAVEOF_IP} -p ${REDIS_SLAVEOF_PORT}"
|
||||||
|
else
|
||||||
|
REDIS_CMDLINE="redis-cli -h redis -p 6379"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Is replication active?
|
||||||
|
# grep on file is less expensive than doveconf
|
||||||
|
if ! grep -qi mail_replica /etc/dovecot/dovecot.conf; then
|
||||||
|
${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH 1 > /dev/null
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
FAILED_SYNCS=$(doveadm replicator status | grep "Waiting 'failed' requests" | grep -oE '[0-9]+')
|
||||||
|
|
||||||
|
# Set amount of failed jobs as DOVECOT_REPL_HEALTH
|
||||||
|
# 1 failed job for mailcow.local is expected and healthy
|
||||||
|
if [[ "${FAILED_SYNCS}" != 1 ]]; then
|
||||||
|
printf "Dovecot replicator has %d failed jobs\n" "${FAILED_SYNCS}"
|
||||||
|
${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH "${FAILED_SYNCS}" > /dev/null
|
||||||
|
else
|
||||||
|
${REDIS_CMDLINE} SET DOVECOT_REPL_HEALTH 1 > /dev/null
|
||||||
|
fi
|
|
@ -438,6 +438,33 @@ dovecot_checks() {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dovecot_repl_checks() {
|
||||||
|
err_count=0
|
||||||
|
diff_c=0
|
||||||
|
THRESHOLD=${DOVECOT_REPL_THRESHOLD}
|
||||||
|
D_REPL_STATUS=$(redis-cli -h redis -r GET DOVECOT_REPL_HEALTH)
|
||||||
|
# Reduce error count by 2 after restarting an unhealthy container
|
||||||
|
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
|
||||||
|
while [ ${err_count} -lt ${THRESHOLD} ]; do
|
||||||
|
err_c_cur=${err_count}
|
||||||
|
D_REPL_STATUS=$(redis-cli --raw -h redis GET DOVECOT_REPL_HEALTH)
|
||||||
|
if [[ "${D_REPL_STATUS}" != "1" ]]; then
|
||||||
|
err_count=$(( ${err_count} + 1 ))
|
||||||
|
fi
|
||||||
|
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
|
||||||
|
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
|
||||||
|
progress "Dovecot replication" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
|
||||||
|
if [[ $? == 10 ]]; then
|
||||||
|
diff_c=0
|
||||||
|
sleep 1
|
||||||
|
else
|
||||||
|
diff_c=0
|
||||||
|
sleep $(( ( RANDOM % 60 ) + 20 ))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
phpfpm_checks() {
|
phpfpm_checks() {
|
||||||
err_count=0
|
err_count=0
|
||||||
diff_c=0
|
diff_c=0
|
||||||
|
@ -807,6 +834,18 @@ PID=$!
|
||||||
echo "Spawned dovecot_checks with PID ${PID}"
|
echo "Spawned dovecot_checks with PID ${PID}"
|
||||||
BACKGROUND_TASKS+=(${PID})
|
BACKGROUND_TASKS+=(${PID})
|
||||||
|
|
||||||
|
(
|
||||||
|
while true; do
|
||||||
|
if ! dovecot_repl_checks; then
|
||||||
|
log_msg "Dovecot hit error limit"
|
||||||
|
echo dovecot_repl_checks > /tmp/com_pipe
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
) &
|
||||||
|
PID=$!
|
||||||
|
echo "Spawned dovecot_repl_checks with PID ${PID}"
|
||||||
|
BACKGROUND_TASKS+=(${PID})
|
||||||
|
|
||||||
(
|
(
|
||||||
while true; do
|
while true; do
|
||||||
if ! rspamd_checks; then
|
if ! rspamd_checks; then
|
||||||
|
@ -925,6 +964,9 @@ while true; do
|
||||||
elif [[ ${com_pipe_answer} == "mysql_repl_checks" ]]; then
|
elif [[ ${com_pipe_answer} == "mysql_repl_checks" ]]; then
|
||||||
log_msg "MySQL replication is not working properly"
|
log_msg "MySQL replication is not working properly"
|
||||||
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
|
||||||
|
elif [[ ${com_pipe_answer} == "dovecot_repl_checks" ]]; then
|
||||||
|
log_msg "Dovecot replication is not working properly" "Please check doveadm replicator status"
|
||||||
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
|
||||||
elif [[ ${com_pipe_answer} == "acme-mailcow" ]]; then
|
elif [[ ${com_pipe_answer} == "acme-mailcow" ]]; then
|
||||||
log_msg "acme-mailcow did not complete successfully"
|
log_msg "acme-mailcow did not complete successfully"
|
||||||
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for further information."
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for further information."
|
||||||
|
|
|
@ -189,7 +189,7 @@ services:
|
||||||
- sogo
|
- sogo
|
||||||
|
|
||||||
dovecot-mailcow:
|
dovecot-mailcow:
|
||||||
image: mailcow/dovecot:1.117
|
image: mailcow/dovecot:1.118
|
||||||
depends_on:
|
depends_on:
|
||||||
- mysql-mailcow
|
- mysql-mailcow
|
||||||
dns:
|
dns:
|
||||||
|
@ -394,7 +394,7 @@ services:
|
||||||
- /lib/modules:/lib/modules:ro
|
- /lib/modules:/lib/modules:ro
|
||||||
|
|
||||||
watchdog-mailcow:
|
watchdog-mailcow:
|
||||||
image: mailcow/watchdog:1.73
|
image: mailcow/watchdog:1.74
|
||||||
# Debug
|
# Debug
|
||||||
#command: /watchdog.sh
|
#command: /watchdog.sh
|
||||||
dns:
|
dns:
|
||||||
|
@ -436,6 +436,7 @@ services:
|
||||||
- POSTFIX_THRESHOLD=8
|
- POSTFIX_THRESHOLD=8
|
||||||
- CLAMD_THRESHOLD=15
|
- CLAMD_THRESHOLD=15
|
||||||
- DOVECOT_THRESHOLD=12
|
- DOVECOT_THRESHOLD=12
|
||||||
|
- DOVECOT_REPL_THRESHOLD=2
|
||||||
- PHPFPM_THRESHOLD=5
|
- PHPFPM_THRESHOLD=5
|
||||||
- RATELIMIT_THRESHOLD=1
|
- RATELIMIT_THRESHOLD=1
|
||||||
- FAIL2BAN_THRESHOLD=1
|
- FAIL2BAN_THRESHOLD=1
|
||||||
|
|
Loading…
Reference in New Issue