[Watchdog] Define thresholds in docker-compose(.override) file

master
andryyy 2020-02-21 08:48:40 +01:00
parent 3d2962a12d
commit 55151b1313
No known key found for this signature in database
GPG Key ID: 8EC34FF2794E25EF
2 changed files with 33 additions and 17 deletions

View File

@ -171,7 +171,7 @@ fi
external_checks() { external_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=1 THRESHOLD=${EXTERNAL_CHECKS_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
GUID=$(mysql -u${DBUSER} -p${DBPASS} ${DBNAME} -e "SELECT version FROM versions WHERE application = 'GUID'" -BN) GUID=$(mysql -u${DBUSER} -p${DBPASS} ${DBNAME} -e "SELECT version FROM versions WHERE application = 'GUID'" -BN)
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
@ -204,7 +204,7 @@ external_checks() {
nginx_checks() { nginx_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${NGINX_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -229,7 +229,7 @@ nginx_checks() {
unbound_checks() { unbound_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${UNBOUND_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -262,7 +262,7 @@ redis_checks() {
# A check for the local redis container # A check for the local redis container
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${REDIS_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -287,7 +287,7 @@ redis_checks() {
mysql_checks() { mysql_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${MYSQL_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -313,7 +313,7 @@ mysql_checks() {
sogo_checks() { sogo_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${SOGO_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -338,7 +338,7 @@ sogo_checks() {
postfix_checks() { postfix_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=8 THRESHOLD=${POSTFIX_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -364,7 +364,7 @@ postfix_checks() {
clamd_checks() { clamd_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=15 THRESHOLD=${CLAMD_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -389,7 +389,7 @@ clamd_checks() {
dovecot_checks() { dovecot_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=12 THRESHOLD=${DOVECOT_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -418,7 +418,7 @@ dovecot_checks() {
phpfpm_checks() { phpfpm_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${PHPFPM_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -444,7 +444,7 @@ phpfpm_checks() {
ratelimit_checks() { ratelimit_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=1 THRESHOLD=${RATELIMIT_THRESHOLD}
RL_LOG_STATUS=$(redis-cli -h redis LRANGE RL_LOG 0 0 | jq .qid) RL_LOG_STATUS=$(redis-cli -h redis LRANGE RL_LOG 0 0 | jq .qid)
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
@ -472,7 +472,7 @@ ratelimit_checks() {
fail2ban_checks() { fail2ban_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=1 THRESHOLD=${FAIL2BAN_THRESHOLD}
F2B_LOG_STATUS=($(${REDIS_CMDLINE} --raw HKEYS F2B_ACTIVE_BANS)) F2B_LOG_STATUS=($(${REDIS_CMDLINE} --raw HKEYS F2B_ACTIVE_BANS))
F2B_RES= F2B_RES=
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
@ -506,7 +506,7 @@ fail2ban_checks() {
acme_checks() { acme_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=1 THRESHOLD=${ACME_THRESHOLD}
ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME)
if [[ -z "${ACME_LOG_STATUS}" ]]; then if [[ -z "${ACME_LOG_STATUS}" ]]; then
${REDIS_CMDLINE} SET ACME_FAIL_TIME 0 ${REDIS_CMDLINE} SET ACME_FAIL_TIME 0
@ -543,7 +543,7 @@ acme_checks() {
ipv6nat_checks() { ipv6nat_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=1 THRESHOLD=${IPV6NAT_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -576,7 +576,7 @@ ipv6nat_checks() {
rspamd_checks() { rspamd_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${RSPAMD_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do
@ -611,7 +611,7 @@ Empty
olefy_checks() { olefy_checks() {
err_count=0 err_count=0
diff_c=0 diff_c=0
THRESHOLD=5 THRESHOLD=${OLEFY_THRESHOLD}
# Reduce error count by 2 after restarting an unhealthy container # Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do while [ ${err_count} -lt ${THRESHOLD} ]; do

View File

@ -386,7 +386,7 @@ services:
- /lib/modules:/lib/modules:ro - /lib/modules:/lib/modules:ro
watchdog-mailcow: watchdog-mailcow:
image: mailcow/watchdog:1.69 image: mailcow/watchdog:1.70
# Debug # Debug
#command: /watchdog.sh #command: /watchdog.sh
dns: dns:
@ -416,6 +416,22 @@ services:
- HTTPS_PORT=${HTTPS_PORT:-443} - HTTPS_PORT=${HTTPS_PORT:-443}
- REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-} - REDIS_SLAVEOF_IP=${REDIS_SLAVEOF_IP:-}
- REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-} - REDIS_SLAVEOF_PORT=${REDIS_SLAVEOF_PORT:-}
- EXTERNAL_CHECKS_THRESHOLD=1
- NGINX_THRESHOLD=5
- UNBOUND_THRESHOLD=5
- REDIS_THRESHOLD=5
- MYSQL_THRESHOLD=5
- SOGO_THRESHOLD=3
- POSTFIX_THRESHOLD=8
- CLAMD_THRESHOLD=15
- DOVECOT_THRESHOLD=12
- PHPFPM_THRESHOLD=5
- RATELIMIT_THRESHOLD=1
- FAIL2BAN_THRESHOLD=1
- ACME_THRESHOLD=1
- IPV6NAT_THRESHOLD=1
- RSPAMD_THRESHOLD=5
- OLEFY_THRESHOLD=5
networks: networks:
mailcow-network: mailcow-network:
aliases: aliases: