diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index c563f5a3..f0f5971b 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -171,7 +171,7 @@ fi external_checks() { err_count=0 diff_c=0 - THRESHOLD=1 + THRESHOLD=${EXTERNAL_CHECKS_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container GUID=$(mysql -u${DBUSER} -p${DBPASS} ${DBNAME} -e "SELECT version FROM versions WHERE application = 'GUID'" -BN) trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 @@ -204,7 +204,7 @@ external_checks() { nginx_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${NGINX_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -229,7 +229,7 @@ nginx_checks() { unbound_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${UNBOUND_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -262,7 +262,7 @@ redis_checks() { # A check for the local redis container err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${REDIS_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -287,7 +287,7 @@ redis_checks() { mysql_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${MYSQL_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -313,7 +313,7 @@ mysql_checks() { sogo_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${SOGO_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -338,7 +338,7 @@ sogo_checks() { postfix_checks() { err_count=0 diff_c=0 - THRESHOLD=8 + THRESHOLD=${POSTFIX_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -364,7 +364,7 @@ postfix_checks() { clamd_checks() { err_count=0 diff_c=0 - THRESHOLD=15 + THRESHOLD=${CLAMD_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -389,7 +389,7 @@ clamd_checks() { dovecot_checks() { err_count=0 diff_c=0 - THRESHOLD=12 + THRESHOLD=${DOVECOT_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -418,7 +418,7 @@ dovecot_checks() { phpfpm_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${PHPFPM_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -444,7 +444,7 @@ phpfpm_checks() { ratelimit_checks() { err_count=0 diff_c=0 - THRESHOLD=1 + THRESHOLD=${RATELIMIT_THRESHOLD} RL_LOG_STATUS=$(redis-cli -h redis LRANGE RL_LOG 0 0 | jq .qid) # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 @@ -472,7 +472,7 @@ ratelimit_checks() { fail2ban_checks() { err_count=0 diff_c=0 - THRESHOLD=1 + THRESHOLD=${FAIL2BAN_THRESHOLD} F2B_LOG_STATUS=($(${REDIS_CMDLINE} --raw HKEYS F2B_ACTIVE_BANS)) F2B_RES= # Reduce error count by 2 after restarting an unhealthy container @@ -506,7 +506,7 @@ fail2ban_checks() { acme_checks() { err_count=0 diff_c=0 - THRESHOLD=1 + THRESHOLD=${ACME_THRESHOLD} ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) if [[ -z "${ACME_LOG_STATUS}" ]]; then ${REDIS_CMDLINE} SET ACME_FAIL_TIME 0 @@ -543,7 +543,7 @@ acme_checks() { ipv6nat_checks() { err_count=0 diff_c=0 - THRESHOLD=1 + THRESHOLD=${IPV6NAT_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -576,7 +576,7 @@ ipv6nat_checks() { rspamd_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${RSPAMD_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do @@ -611,7 +611,7 @@ Empty olefy_checks() { err_count=0 diff_c=0 - THRESHOLD=5 + THRESHOLD=${OLEFY_THRESHOLD} # Reduce error count by 2 after restarting an unhealthy container trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 while [ ${err_count} -lt ${THRESHOLD} ]; do diff --git a/docker-compose.yml b/docker-compose.yml index 19fd4084..5492a103 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -386,7 +386,7 @@ services: - /lib/modules:/lib/modules:ro watchdog-mailcow: - image: mailcow/watchdog:1.69 + image: mailcow/watchdog:1.70 # Debug #command: /watchdog.sh dns: @@ -416,6 +416,22 @@ services: - HTTPS_PORT=${HTTPS_PORT:-443} - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} + - EXTERNAL_CHECKS_THRESHOLD=1 + - NGINX_THRESHOLD=5 + - UNBOUND_THRESHOLD=5 + - REDIS_THRESHOLD=5 + - MYSQL_THRESHOLD=5 + - SOGO_THRESHOLD=3 + - POSTFIX_THRESHOLD=8 + - CLAMD_THRESHOLD=15 + - DOVECOT_THRESHOLD=12 + - PHPFPM_THRESHOLD=5 + - RATELIMIT_THRESHOLD=1 + - FAIL2BAN_THRESHOLD=1 + - ACME_THRESHOLD=1 + - IPV6NAT_THRESHOLD=1 + - RSPAMD_THRESHOLD=5 + - OLEFY_THRESHOLD=5 networks: mailcow-network: aliases: