From 424bc997ad98fb28fcc72793a6c6a8480fa90b46 Mon Sep 17 00:00:00 2001 From: andryyy Date: Thu, 27 Dec 2018 08:20:49 +0100 Subject: [PATCH] [Watchdog] Add check for IPv6 NAT: Make sure IPv6 NAT container was started at least 30s after other containers [Compose] ipv6nat depends on all containers [Compose] Update watchdog image --- data/Dockerfiles/watchdog/watchdog.sh | 50 +++++++++++++++++++++++++-- docker-compose.yml | 18 +++++++++- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index 15bf6e2a..c81e720f 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -350,6 +350,37 @@ ratelimit_checks() { return 1 } +ipv6nat_checks() { + err_count=0 + diff_c=0 + THRESHOLD=1 + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + err_c_cur=${err_count} + IPV6NAT_CONTAINER_ID=$(curl --silent --insecure https://dockerapi/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"ipv6nat\")) | .id") + if [[ ! -z ${IPV6NAT_CONTAINER_ID} ]]; then + LATEST_STARTED="$(curl --silent --insecure https://dockerapi/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], StartedAt: .State.StartedAt}" | jq -rc "select( .name | tostring | contains(\"ipv6nat\") | not)" | jq -rc .StartedAt | xargs -n1 date +%s -d | sort | tail -n1)" + LATEST_IPV6NAT="$(curl --silent --insecure https://dockerapi/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], StartedAt: .State.StartedAt}" | jq -rc "select( .name | tostring | contains(\"ipv6nat\"))" | jq -rc .StartedAt | xargs -n1 date +%s -d | sort | tail -n1)" + DIFFERENCE_START_TIME=$(expr ${LATEST_IPV6NAT} - ${LATEST_STARTED} 2>/dev/null) + if [[ "${DIFFERENCE_START_TIME}" -lt 30 ]]; then + err_count=$(( ${err_count} + 1 )) + fi + fi + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "IPv6 NAT" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + if [[ $? == 10 ]]; then + diff_c=0 + sleep 1 + else + diff_c=0 + sleep $(( ( RANDOM % 30 ) + 10 )) + fi + done + return 1 +} + rspamd_checks() { err_count=0 diff_c=0 @@ -485,6 +516,17 @@ while true; do done ) & BACKGROUND_TASKS+=($!) + +( +while true; do + if ! ipv6nat_checks; then + log_msg "IPv6 NAT warning: ipv6nat container was not started at least 30s after siblings (not an error)" + echo ipv6nat > /tmp/com_pipe + fi +done +) & +BACKGROUND_TASKS+=($!) + # Monitor watchdog agents, stop script when agents fails and wait for respawn by Docker (restart:always:n) ( while true; do @@ -522,7 +564,7 @@ while true; do if [[ ${com_pipe_answer} == "ratelimit" ]]; then log_msg "At least one ratelimit was applied" [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "No further information available." - elif [[ ${com_pipe_answer} =~ .+-mailcow ]]; then + elif [[ ${com_pipe_answer} =~ .+-mailcow ]] || [[ ${com_pipe_answer} == "ipv6nat" ]]; then kill -STOP ${BACKGROUND_TASKS[*]} sleep 3 CONTAINER_ID=$(curl --silent --insecure https://dockerapi/containers/json | jq -r ".[] | {name: .Config.Labels[\"com.docker.compose.service\"], id: .Id}" | jq -rc "select( .name | tostring | contains(\"${com_pipe_answer}\")) | .id") @@ -539,9 +581,11 @@ while true; do else log_msg "Sending restart command to ${CONTAINER_ID}..." curl --silent --insecure -XPOST https://dockerapi/containers/${CONTAINER_ID}/restart - [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" + if [[ ${com_pipe_answer} != "ipv6nat" ]]; then + [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" + fi log_msg "Wait for restarted container to settle and continue watching..." - sleep 30 + sleep 35 fi fi kill -CONT ${BACKGROUND_TASKS[*]} diff --git a/docker-compose.yml b/docker-compose.yml index eed5239b..422efd3d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -342,7 +342,7 @@ services: - /lib/modules:/lib/modules:ro watchdog-mailcow: - image: mailcow/watchdog:1.30 + image: mailcow/watchdog:1.31 # Debug #command: /watchdog.sh build: ./data/Dockerfiles/watchdog @@ -391,6 +391,22 @@ services: - dockerapi ipv6nat: + depends_on: + - unbound-mailcow + - mysql-mailcow + - redis-mailcow + - clamd-mailcow + - rspamd-mailcow + - php-fpm-mailcow + - sogo-mailcow + - dovecot-mailcow + - postfix-mailcow + - memcached-mailcow + - nginx-mailcow + - acme-mailcow + - netfilter-mailcow + - watchdog-mailcow + - dockerapi-mailcow image: robbertkl/ipv6nat restart: always privileged: true