From 6f1ec5acbf0a2dd19fdaf2c4749b3e0932b74469 Mon Sep 17 00:00:00 2001 From: andryyy Date: Sat, 15 Dec 2018 21:21:22 +0100 Subject: [PATCH] [Watchdog] Alert when ratelimit log changed (does NOT send one mail per triggered ratelimit) --- data/Dockerfiles/watchdog/watchdog.sh | 42 ++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index 0b903354..15bf6e2a 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -322,6 +322,34 @@ phpfpm_checks() { return 1 } +ratelimit_checks() { + err_count=0 + diff_c=0 + THRESHOLD=1 + RL_LOG_STATUS=$(redis-cli -h redis LRANGE RL_LOG 0 0 | jq .qid) + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + err_c_cur=${err_count} + RL_LOG_STATUS_PREV=${RL_LOG_STATUS} + RL_LOG_STATUS=$(redis-cli -h redis LRANGE RL_LOG 0 0 | jq .qid) + if [[ ${RL_LOG_STATUS_PREV} != ${RL_LOG_STATUS} ]]; then + err_count=$(( ${err_count} + 1 )) + fi + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "Ratelimit" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + if [[ $? == 10 ]]; then + diff_c=0 + sleep 1 + else + diff_c=0 + sleep $(( ( RANDOM % 30 ) + 10 )) + fi + done + return 1 +} + rspamd_checks() { err_count=0 diff_c=0 @@ -448,6 +476,15 @@ done ) & BACKGROUND_TASKS+=($!) +( +while true; do + if ! ratelimit_checks; then + log_msg "Ratelimit hit error limit" + echo ratelimit > /tmp/com_pipe + fi +done +) & +BACKGROUND_TASKS+=($!) # Monitor watchdog agents, stop script when agents fails and wait for respawn by Docker (restart:always:n) ( while true; do @@ -482,7 +519,10 @@ while true; do CONTAINER_ID= HAS_INITDB= read com_pipe_answer