[Watchdog] Add certificate check for primary certificate; Add mail delay
parent
da6f510027
commit
bcad1af121
|
@ -87,9 +87,22 @@ log_msg() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function mail_error() {
|
function mail_error() {
|
||||||
|
THROTTLE=
|
||||||
[[ -z ${1} ]] && return 1
|
[[ -z ${1} ]] && return 1
|
||||||
# If exists, body will be the content of "/tmp/${1}", even if ${2} is set
|
# If exists, body will be the content of "/tmp/${1}", even if ${2} is set
|
||||||
[[ -z ${2} ]] && BODY="Service was restarted on $(date), please check your mailcow installation." || BODY="$(date) - ${2}"
|
[[ -z ${2} ]] && BODY="Service was restarted on $(date), please check your mailcow installation." || BODY="$(date) - ${2}"
|
||||||
|
# If exists, mail will be throttled by argument in seconds
|
||||||
|
[[ ! -z ${3} ]] && THROTTLE=${3}
|
||||||
|
if [[ ! -z ${THROTTLE} ]]; then
|
||||||
|
TTL_LEFT="$(${REDIS_CMDLINE} TTL THROTTLE_${1} 2> /dev/null)"
|
||||||
|
if [[ "${TTL_LEFT}" == "-2" ]]; then
|
||||||
|
# Delay key not found, setting a delay key now
|
||||||
|
${REDIS_CMDLINE} SET THROTTLE_${1} 1 EX ${THROTTLE}
|
||||||
|
else
|
||||||
|
log_msg "Not sending notification email now, blocked for ${TTL_LEFT} seconds..."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
WATCHDOG_NOTIFY_EMAIL=$(echo "${WATCHDOG_NOTIFY_EMAIL}" | sed 's/"//;s|"$||')
|
WATCHDOG_NOTIFY_EMAIL=$(echo "${WATCHDOG_NOTIFY_EMAIL}" | sed 's/"//;s|"$||')
|
||||||
# Some exceptions for subject and body formats
|
# Some exceptions for subject and body formats
|
||||||
if [[ ${1} == "fail2ban" ]]; then
|
if [[ ${1} == "fail2ban" ]]; then
|
||||||
|
@ -467,6 +480,28 @@ dovecot_repl_checks() {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cert_checks() {
|
||||||
|
err_count=0
|
||||||
|
diff_c=0
|
||||||
|
THRESHOLD=1
|
||||||
|
# Reduce error count by 2 after restarting an unhealthy container
|
||||||
|
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
|
||||||
|
while [ ${err_count} -lt ${THRESHOLD} ]; do
|
||||||
|
touch /tmp/certcheck; echo "$(tail -50 /tmp/certcheck)" > /tmp/certcheck
|
||||||
|
host_ip_postfix=$(get_container_ip postfix)
|
||||||
|
host_ip_dovecot=$(get_container_ip dovecot)
|
||||||
|
err_c_cur=${err_count}
|
||||||
|
/usr/lib/nagios/plugins/check_smtp -H ${host_ip_postfix} -p 25 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? ))
|
||||||
|
/usr/lib/nagios/plugins/check_imap -H ${host_ip_dovecot} -p 993 -4 -S -D 7 2>> /tmp/certcheck 1>&2; err_count=$(( ${err_count} + $? ))
|
||||||
|
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
|
||||||
|
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
|
||||||
|
progress "Certificate expiry check" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
|
||||||
|
# Always sleep 5 minutes, mail notifications are limited
|
||||||
|
sleep 300
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
phpfpm_checks() {
|
phpfpm_checks() {
|
||||||
err_count=0
|
err_count=0
|
||||||
diff_c=0
|
diff_c=0
|
||||||
|
@ -927,6 +962,18 @@ PID=$!
|
||||||
echo "Spawned fail2ban_checks with PID ${PID}"
|
echo "Spawned fail2ban_checks with PID ${PID}"
|
||||||
BACKGROUND_TASKS+=(${PID})
|
BACKGROUND_TASKS+=(${PID})
|
||||||
|
|
||||||
|
(
|
||||||
|
while true; do
|
||||||
|
if ! cert_checks; then
|
||||||
|
log_msg "Cert check hit error limit"
|
||||||
|
echo certcheck > /tmp/com_pipe
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
) &
|
||||||
|
PID=$!
|
||||||
|
echo "Spawned cert_checks with PID ${PID}"
|
||||||
|
BACKGROUND_TASKS+=(${PID})
|
||||||
|
|
||||||
(
|
(
|
||||||
while true; do
|
while true; do
|
||||||
if ! olefy_checks; then
|
if ! olefy_checks; then
|
||||||
|
@ -1013,11 +1060,18 @@ while true; do
|
||||||
elif [[ ${com_pipe_answer} == "mysql_repl_checks" ]]; then
|
elif [[ ${com_pipe_answer} == "mysql_repl_checks" ]]; then
|
||||||
log_msg "MySQL replication is not working properly"
|
log_msg "MySQL replication is not working properly"
|
||||||
# Define $2 to override message text, else print service was restarted at ...
|
# Define $2 to override message text, else print service was restarted at ...
|
||||||
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check the SQL replication status"
|
# Once mail per 10 minutes
|
||||||
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check the SQL replication status" 600
|
||||||
elif [[ ${com_pipe_answer} == "dovecot_repl_checks" ]]; then
|
elif [[ ${com_pipe_answer} == "dovecot_repl_checks" ]]; then
|
||||||
log_msg "Dovecot replication is not working properly"
|
log_msg "Dovecot replication is not working properly"
|
||||||
# Define $2 to override message text, else print service was restarted at ...
|
# Define $2 to override message text, else print service was restarted at ...
|
||||||
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check the Dovecot replicator status"
|
# Once mail per 10 minutes
|
||||||
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check the Dovecot replicator status" 600
|
||||||
|
elif [[ ${com_pipe_answer} == "certcheck" ]]; then
|
||||||
|
log_msg "Certificates are about to expire"
|
||||||
|
# Define $2 to override message text, else print service was restarted at ...
|
||||||
|
# Only mail once a day
|
||||||
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please renew your certificate" 86400
|
||||||
elif [[ ${com_pipe_answer} == "acme-mailcow" ]]; then
|
elif [[ ${com_pipe_answer} == "acme-mailcow" ]]; then
|
||||||
log_msg "acme-mailcow did not complete successfully"
|
log_msg "acme-mailcow did not complete successfully"
|
||||||
# Define $2 to override message text, else print service was restarted at ...
|
# Define $2 to override message text, else print service was restarted at ...
|
||||||
|
|
|
@ -401,7 +401,7 @@ services:
|
||||||
- /lib/modules:/lib/modules:ro
|
- /lib/modules:/lib/modules:ro
|
||||||
|
|
||||||
watchdog-mailcow:
|
watchdog-mailcow:
|
||||||
image: mailcow/watchdog:1.82
|
image: mailcow/watchdog:1.83
|
||||||
# Debug
|
# Debug
|
||||||
#command: /watchdog.sh
|
#command: /watchdog.sh
|
||||||
dns:
|
dns:
|
||||||
|
|
Loading…
Reference in New Issue