[Watchdog] Check for ACME failures

master
andryyy 2019-03-27 23:15:04 +01:00
parent 8b0f7fa81b
commit e7d17ad1ac
No known key found for this signature in database
GPG Key ID: 8EC34FF2794E25EF
1 changed files with 48 additions and 1 deletions

View File

@ -5,6 +5,8 @@ trap "kill 0" EXIT
# Prepare # Prepare
BACKGROUND_TASKS=() BACKGROUND_TASKS=()
echo "Waiting for containers to settle..."
sleep 10
if [[ "${USE_WATCHDOG}" =~ ^([nN][oO]|[nN])+$ ]]; then if [[ "${USE_WATCHDOG}" =~ ^([nN][oO]|[nN])+$ ]]; then
echo -e "$(date) - USE_WATCHDOG=n, skipping watchdog..." echo -e "$(date) - USE_WATCHDOG=n, skipping watchdog..."
@ -350,6 +352,38 @@ ratelimit_checks() {
return 1 return 1
} }
acme_checks() {
err_count=0
diff_c=0
THRESHOLD=1
ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME)
if [[ -z "${ACME_LOG_STATUS}" ]]; then
redis-cli -h redis SET ACME_FAIL_TIME 0
ACME_LOG_STATUS=0
fi
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
err_c_cur=${err_count}
ACME_LOG_STATUS_PREV=${ACME_LOG_STATUS}
ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME)
if [[ ${ACME_LOG_STATUS_PREV} != ${ACME_LOG_STATUS} ]]; then
err_count=$(( ${err_count} + 1 ))
fi
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
progress "ACME" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
if [[ $? == 10 ]]; then
diff_c=0
sleep 1
else
diff_c=0
sleep $(( ( RANDOM % 30 ) + 10 ))
fi
done
return 1
}
ipv6nat_checks() { ipv6nat_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
@ -518,6 +552,16 @@ done
) & ) &
BACKGROUND_TASKS+=($!) BACKGROUND_TASKS+=($!)
(
while true; do
if ! acme_checks; then
log_msg "ACME client hit error limit"
echo acme-tiny > /tmp/com_pipe
fi
done
) &
BACKGROUND_TASKS+=($!)
( (
while true; do while true; do
if ! ipv6nat_checks; then if ! ipv6nat_checks; then
@ -567,7 +611,10 @@ while true; do
fi fi
if [[ ${com_pipe_answer} == "ratelimit" ]]; then if [[ ${com_pipe_answer} == "ratelimit" ]]; then
log_msg "At least one ratelimit was applied" log_msg "At least one ratelimit was applied"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "No further information available." [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please see mailcow UI logs for further information."
elif [[ ${com_pipe_answer} == "acme-tiny" ]]; then
log_msg "acme-tiny client returned non-zero exit code"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for ruther information."
elif [[ ${com_pipe_answer} =~ .+-mailcow ]] || [[ ${com_pipe_answer} == "ipv6nat-mailcow" ]]; then elif [[ ${com_pipe_answer} =~ .+-mailcow ]] || [[ ${com_pipe_answer} == "ipv6nat-mailcow" ]]; then
kill -STOP ${BACKGROUND_TASKS[*]} kill -STOP ${BACKGROUND_TASKS[*]}
sleep 3 sleep 3