From 62524150d280954ba8a214ae89c4a11175dde27e Mon Sep 17 00:00:00 2001 From: andryyy Date: Thu, 21 Sep 2017 19:30:03 +0200 Subject: [PATCH] [ACME] Add timestamps, check if acme account key is valid [Postfix] Ignore local0 [Watchdog] Add Rspamd checks --- data/Dockerfiles/acme/docker-entrypoint.sh | 114 +++++++++++---------- data/Dockerfiles/postfix/syslog-ng.conf | 2 + data/Dockerfiles/watchdog/watchdog.sh | 45 +++++++- 3 files changed, 103 insertions(+), 58 deletions(-) diff --git a/data/Dockerfiles/acme/docker-entrypoint.sh b/data/Dockerfiles/acme/docker-entrypoint.sh index 2cfe8224..02aae4f6 100755 --- a/data/Dockerfiles/acme/docker-entrypoint.sh +++ b/data/Dockerfiles/acme/docker-entrypoint.sh @@ -16,14 +16,18 @@ restart_containers(){ done } +log_f() { + echo "$(date) - ${1}" +} + verify_hash_match(){ CERT_HASH=$(openssl x509 -noout -modulus -in "${1}" | openssl md5) KEY_HASH=$(openssl rsa -noout -modulus -in "${2}" | openssl md5) if [[ ${CERT_HASH} != ${KEY_HASH} ]]; then - echo "Certificate and key hashes do not match!" + log_f "Certificate and key hashes do not match!" return 1 else - echo "Verified hashes." + log_f "Verified hashes." return 0 fi } @@ -33,7 +37,7 @@ get_ipv4(){ local IPV4_SRCS= local TRY= IPV4_SRCS[0]="api.ipify.org" - IPV4_SRCS[1]="ifconfig.co" + IPV4_SRCS[1]="ifconfig.co"- IPV4_SRCS[2]="icanhazip.com" IPV4_SRCS[3]="v4.ident.me" IPV4_SRCS[4]="ipecho.net/plain" @@ -51,7 +55,7 @@ get_ipv4(){ if [[ -f ${ACME_BASE}/cert.pem ]] && [[ -f ${ACME_BASE}/key.pem ]]; then ISSUER=$(openssl x509 -in ${ACME_BASE}/cert.pem -noout -issuer) if [[ ${ISSUER} != *"Let's Encrypt"* && ${ISSUER} != *"mailcow"* ]]; then - echo "Found certificate with issuer other than mailcow snake-oil CA and Let's Encrypt, skipping ACME client..." + log_f "Found certificate with issuer other than mailcow snake-oil CA and Let's Encrypt, skipping ACME client..." sleep 3650d exec $(readlink -f "$0") else @@ -59,21 +63,21 @@ if [[ -f ${ACME_BASE}/cert.pem ]] && [[ -f ${ACME_BASE}/key.pem ]]; then SAN_NAMES=$(openssl x509 -noout -text -in ${ACME_BASE}/cert.pem | awk '/X509v3 Subject Alternative Name/ {getline;gsub(/ /, "", $0); print}' | tr -d "DNS:") if [[ ! -z ${SAN_NAMES} ]]; then IFS=',' read -a SAN_ARRAY_NOW <<< ${SAN_NAMES} - echo "Found Let's Encrypt or mailcow snake-oil CA issued certificate with SANs: ${SAN_ARRAY_NOW[*]}" + log_f "Found Let's Encrypt or mailcow snake-oil CA issued certificate with SANs: ${SAN_ARRAY_NOW[*]}" fi fi else if [[ -f ${ACME_BASE}/acme/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/privkey.pem ]]; then if verify_hash_match ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/acme/private/privkey.pem; then - echo "Restoring previous acme certificate and restarting script..." + log_f "Restoring previous acme certificate and restarting script..." cp ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/key.pem - # Restarting with env var set to trigger a restart, + # Restarting with env var set to trigger a restart, exec env TRIGGER_RESTART=1 $(readlink -f "$0") fi ISSUER="mailcow" else - echo "Restoring mailcow snake-oil certificates and restarting script..." + log_f "Restoring mailcow snake-oil certificates and restarting script..." cp ${SSL_EXAMPLE}/cert.pem ${ACME_BASE}/cert.pem cp ${SSL_EXAMPLE}/key.pem ${ACME_BASE}/key.pem exec env TRIGGER_RESTART=1 $(readlink -f "$0") @@ -82,7 +86,7 @@ fi while true; do if [[ "${SKIP_LETS_ENCRYPT}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then - echo "SKIP_LETS_ENCRYPT=y, skipping Let's Encrypt..." + log_f "SKIP_LETS_ENCRYPT=y, skipping Let's Encrypt..." sleep 365d exec $(readlink -f "$0") fi @@ -110,42 +114,42 @@ while true; do for SQL_DOMAIN in "${SQL_DOMAIN_ARR[@]}"; do A_CONFIG=$(dig A autoconfig.${SQL_DOMAIN} +short | tail -n 1) if [[ ! -z ${A_CONFIG} ]]; then - echo "Found A record for autoconfig.${SQL_DOMAIN}: ${A_CONFIG}" + log_f "Found A record for autoconfig.${SQL_DOMAIN}: ${A_CONFIG}" if [[ ${IPV4:-ERR} == ${A_CONFIG} ]] || [[ ${SKIP_IP_CHECK} == "y" ]]; then - echo "Confirmed A record autoconfig.${SQL_DOMAIN}" + log_f "Confirmed A record autoconfig.${SQL_DOMAIN}" VALIDATED_CONFIG_DOMAINS+=("autoconfig.${SQL_DOMAIN}") else - echo "Cannot match your IP ${IPV4} against hostname autoconfig.${SQL_DOMAIN} (${A_CONFIG})" + log_f "Cannot match your IP ${IPV4} against hostname autoconfig.${SQL_DOMAIN} (${A_CONFIG})" fi else - echo "No A record for autoconfig.${SQL_DOMAIN} found" + log_f "No A record for autoconfig.${SQL_DOMAIN} found" fi A_DISCOVER=$(dig A autodiscover.${SQL_DOMAIN} +short | tail -n 1) if [[ ! -z ${A_DISCOVER} ]]; then - echo "Found A record for autodiscover.${SQL_DOMAIN}: ${A_DISCOVER}" + log_f "Found A record for autodiscover.${SQL_DOMAIN}: ${A_DISCOVER}" if [[ ${IPV4:-ERR} == ${A_DISCOVER} ]] || [[ ${SKIP_IP_CHECK} == "y" ]]; then - echo "Confirmed A record autodiscover.${SQL_DOMAIN}" + log_f "Confirmed A record autodiscover.${SQL_DOMAIN}" VALIDATED_CONFIG_DOMAINS+=("autodiscover.${SQL_DOMAIN}") else - echo "Cannot match your IP ${IPV4} against hostname autodiscover.${SQL_DOMAIN} (${A_DISCOVER})" + log_f "Cannot match your IP ${IPV4} against hostname autodiscover.${SQL_DOMAIN} (${A_DISCOVER})" fi else - echo "No A record for autodiscover.${SQL_DOMAIN} found" + log_f "No A record for autodiscover.${SQL_DOMAIN} found" fi done A_MAILCOW_HOSTNAME=$(dig A ${MAILCOW_HOSTNAME} +short | tail -n 1) if [[ ! -z ${A_MAILCOW_HOSTNAME} ]]; then - echo "Found A record for ${MAILCOW_HOSTNAME}: ${A_MAILCOW_HOSTNAME}" + log_f "Found A record for ${MAILCOW_HOSTNAME}: ${A_MAILCOW_HOSTNAME}" if [[ ${IPV4:-ERR} == ${A_MAILCOW_HOSTNAME} ]] || [[ ${SKIP_IP_CHECK} == "y" ]]; then - echo "Confirmed A record ${MAILCOW_HOSTNAME}" + log_f "Confirmed A record ${MAILCOW_HOSTNAME}" VALIDATED_MAILCOW_HOSTNAME=${MAILCOW_HOSTNAME} else - echo "Cannot match your IP ${IPV4} against hostname ${MAILCOW_HOSTNAME} (${A_MAILCOW_HOSTNAME}) " + log_f "Cannot match your IP ${IPV4} against hostname ${MAILCOW_HOSTNAME} (${A_MAILCOW_HOSTNAME}) " fi else - echo "No A record for ${MAILCOW_HOSTNAME} found" + log_f "No A record for ${MAILCOW_HOSTNAME} found" fi for SAN in "${ADDITIONAL_SAN_ARR[@]}"; do @@ -154,23 +158,23 @@ while true; do fi A_SAN=$(dig A ${SAN} +short | tail -n 1) if [[ ! -z ${A_SAN} ]]; then - echo "Found A record for ${SAN}: ${A_SAN}" + log_f "Found A record for ${SAN}: ${A_SAN}" if [[ ${IPV4:-ERR} == ${A_SAN} ]] || [[ ${SKIP_IP_CHECK} == "y" ]]; then - echo "Confirmed A record ${SAN}" + log_f "Confirmed A record ${SAN}" ADDITIONAL_VALIDATED_SAN+=("${SAN}") else - echo "Cannot match your IP against hostname ${SAN}" + log_f "Cannot match your IP against hostname ${SAN}" fi else - echo "No A record for ${SAN} found" + log_f "No A record for ${SAN} found" fi done # Unique elements ALL_VALIDATED=(${VALIDATED_MAILCOW_HOSTNAME} $(echo ${VALIDATED_CONFIG_DOMAINS[*]} ${ADDITIONAL_VALIDATED_SAN[*]} | xargs -n1 | sort -u | xargs)) if [[ -z ${ALL_VALIDATED[*]} ]]; then - echo "Cannot validate hostnames, skipping Let's Encrypt for 1 hour." - echo "Use SKIP_LETS_ENCRYPT=y in mailcow.conf to skip it permanently." + log_f "Cannot validate hostnames, skipping Let's Encrypt for 1 hour." + log_f "Use SKIP_LETS_ENCRYPT=y in mailcow.conf to skip it permanently." sleep 1h exec $(readlink -f "$0") fi @@ -178,7 +182,7 @@ while true; do ORPHANED_SAN=($(echo ${SAN_ARRAY_NOW[*]} ${ALL_VALIDATED[*]} | tr ' ' '\n' | sort | uniq -u )) if [[ ! -z ${ORPHANED_SAN[*]} ]] && [[ ${ISSUER} != *"mailcow"* ]]; then DATE=$(date +%Y-%m-%d_%H_%M_%S) - echo "Found orphaned SAN ${ORPHANED_SAN[*]} in certificate, moving old files to ${ACME_BASE}/acme/private/${DATE}.bak/, keeping key file..." + log_f "Found orphaned SAN ${ORPHANED_SAN[*]} in certificate, moving old files to ${ACME_BASE}/acme/private/${DATE}.bak/, keeping key file..." mkdir -p ${ACME_BASE}/acme/private/${DATE}.bak/ [[ -f ${ACME_BASE}/acme/private/account.key ]] && mv ${ACME_BASE}/acme/private/account.key ${ACME_BASE}/acme/private/${DATE}.bak/ [[ -f ${ACME_BASE}/acme/fullchain.pem ]] && mv ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/acme/private/${DATE}.bak/ @@ -186,12 +190,12 @@ while true; do cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/acme/private/${DATE}.bak/ # Keep key for TLSA 3 1 1 records fi - ACME_RESPONSE=$(acme-client \ - -v -e -b -N -n \ - -f ${ACME_BASE}/acme/private/account.key \ - -k ${ACME_BASE}/acme/private/privkey.pem \ - -c ${ACME_BASE}/acme \ - ${ALL_VALIDATED[*]} 2>&1 | tee /dev/fd/5) + ACME_RESPONSE=$(acme-client \ + -v -e -b -N -n \ + -f ${ACME_BASE}/acme/private/account.key \ + -k ${ACME_BASE}/acme/private/privkey.pem \ + -c ${ACME_BASE}/acme \ + ${ALL_VALIDATED[*]} 2>&1 | tee /dev/fd/5) case "$?" in 0) # new certs @@ -201,50 +205,50 @@ while true; do # restart docker containers if ! verify_hash_match ${ACME_BASE}/cert.pem ${ACME_BASE}/key.pem; then - echo "Certificate was successfully requested, but key and certificate have non-matching hashes, restoring mailcow snake-oil and restarting containers..." + log_f "Certificate was successfully requested, but key and certificate have non-matching hashes, restoring mailcow snake-oil and restarting containers..." cp ${SSL_EXAMPLE}/cert.pem ${ACME_BASE}/cert.pem cp ${SSL_EXAMPLE}/key.pem ${ACME_BASE}/key.pem fi restart_containers ${CONTAINERS_RESTART[*]} ;; 1) # failure - if [[ $ACME_RESPONSE =~ "No registration exists" ]]; then - echo "Registration keys are invalid, deleting old keys and restarting..." - rm ${ACME_BASE}/acme/private/account.key - rm ${ACME_BASE}/acme/private/privkey.pem - exec $(readlink -f "$0") - fi - if [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/privkey.pem ]]; then - echo "Error requesting certificate, restoring previous certificate from backup and restarting containers...." + if [[ $ACME_RESPONSE =~ "No registration exists" ]]; then + log_f "Registration keys are invalid, deleting old keys and restarting..." + rm ${ACME_BASE}/acme/private/account.key + rm ${ACME_BASE}/acme/private/privkey.pem + exec $(readlink -f "$0") + fi + if [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/privkey.pem ]]; then + log_f "Error requesting certificate, restoring previous certificate from backup and restarting containers...." cp ${ACME_BASE}/acme/private/${DATE}.bak/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/${DATE}.bak/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 - elif [[ -f ${ACME_BASE}/acme/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/privkey.pem ]]; then - echo "Error requesting certificate, restoring from previous acme request and restarting containers..." + elif [[ -f ${ACME_BASE}/acme/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/privkey.pem ]]; then + log_f "Error requesting certificate, restoring from previous acme request and restarting containers..." cp ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 fi if ! verify_hash_match ${ACME_BASE}/cert.pem ${ACME_BASE}/key.pem; then - echo "Error verifying certificates, restoring mailcow snake-oil and restarting containers..." + log_f "Error verifying certificates, restoring mailcow snake-oil and restarting containers..." cp ${SSL_EXAMPLE}/cert.pem ${ACME_BASE}/cert.pem cp ${SSL_EXAMPLE}/key.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 fi [[ ${TRIGGER_RESTART} == 1 ]] && restart_containers ${CONTAINERS_RESTART[*]} - echo "Retrying in 30 minutes..." + log_f "Retrying in 30 minutes..." sleep 30m exec $(readlink -f "$0") - ;; + ;; 2) # no change if ! diff ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem; then - echo "Certificate was not changed, but active certificate does not match the verified certificate, fixing and restarting containers..." + log_f "Certificate was not changed, but active certificate does not match the verified certificate, fixing and restarting containers..." cp ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 fi if ! verify_hash_match ${ACME_BASE}/cert.pem ${ACME_BASE}/key.pem; then - echo "Certificate was not changed, but hashes do not match, restoring from previous acme request and restarting containers..." + log_f "Certificate was not changed, but hashes do not match, restoring from previous acme request and restarting containers..." cp ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 @@ -253,30 +257,30 @@ while true; do ;; *) # unspecified if [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/${DATE}.bak/privkey.pem ]]; then - echo "Error requesting certificate, restoring previous certificate from backup and restarting containers...." + log_f "Error requesting certificate, restoring previous certificate from backup and restarting containers...." cp ${ACME_BASE}/acme/private/${DATE}.bak/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/${DATE}.bak/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 elif [[ -f ${ACME_BASE}/acme/fullchain.pem ]] && [[ -f ${ACME_BASE}/acme/private/privkey.pem ]]; then - echo "Error requesting certificate, restoring from previous acme request and restarting containers..." + log_f "Error requesting certificate, restoring from previous acme request and restarting containers..." cp ${ACME_BASE}/acme/fullchain.pem ${ACME_BASE}/cert.pem cp ${ACME_BASE}/acme/private/privkey.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 fi if ! verify_hash_match ${ACME_BASE}/cert.pem ${ACME_BASE}/key.pem; then - echo "Error verifying certificates, restoring mailcow snake-oil..." + log_f "Error verifying certificates, restoring mailcow snake-oil..." cp ${SSL_EXAMPLE}/cert.pem ${ACME_BASE}/cert.pem cp ${SSL_EXAMPLE}/key.pem ${ACME_BASE}/key.pem TRIGGER_RESTART=1 fi [[ ${TRIGGER_RESTART} == 1 ]] && restart_containers ${CONTAINERS_RESTART[*]} - echo "Retrying in 30 minutes..." + log_f "Retrying in 30 minutes..." sleep 30m exec $(readlink -f "$0") ;; esac - echo "ACME certificate validation done. Sleeping for another day." + log_f "ACME certificate validation done. Sleeping for another day." sleep 1d done diff --git a/data/Dockerfiles/postfix/syslog-ng.conf b/data/Dockerfiles/postfix/syslog-ng.conf index cfb76a16..bf5509c0 100644 --- a/data/Dockerfiles/postfix/syslog-ng.conf +++ b/data/Dockerfiles/postfix/syslog-ng.conf @@ -39,8 +39,10 @@ destination d_redis_cleanup { ); }; filter f_mail { facility(mail); }; +filter f_skip_local { facility (local0, local1, local2, local3, local4, local5, local6, local7); }; log { source(s_src); + filter(f_skip_local); destination(d_stdout); filter(f_mail); destination(d_redis_ui_log); diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index 77ea1a4d..a1f7a096 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -109,9 +109,8 @@ postfix_checks() { while [ ${err_count} -lt ${THRESHOLD} ]; do host_ip=$(get_container_ip postfix-mailcow) err_c_cur=${err_count} - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 25 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 588 -f watchdog -C "RCPT TO:null@localhost" -C DATA -C . -R 250 1>&2; err_count=$(( ${err_count} + $? )) - /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 587 -S 1>&2; err_count=$(( ${err_count} + $? )) + /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 589 -f watchdog -C "RCPT TO:null@localhost" -C DATA -C . -R 250 1>&2; err_count=$(( ${err_count} + $? )) + /usr/lib/nagios/plugins/check_smtp -4 -H ${host_ip} -p 589 -S 1>&2; err_count=$(( ${err_count} + $? )) [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) progress "Postfix" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} @@ -161,6 +160,36 @@ phpfpm_checks() { return 1 } +rspamd_checks() { + err_count=0 + diff_c=0 + THRESHOLD=10 + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + host_ip=$(get_container_ip rspamd-mailcow) + err_c_cur=${err_count} + SCORE=$(curl --silent ${host_ip}:11333/scan -d ' +To: null@localhost +From: watchdog@localhost + +Empty +' | jq -rc .required_score) + if [[ ${SCORE} != "9999" ]]; then + echo "Rspamd settings check failed" 1>&2 + err_count=$(( ${err_count} + 1)) + else + echo "Rspamd settings check succeeded" 1>&2 + fi + /usr/lib/nagios/plugins/check_ping -4 -H ${host_ip} -w 2000,10% -c 4000,100% -p2 1>&2; err_count=$(( ${err_count} + $? )) + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "Rspamd" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + sleep $(( ( RANDOM % 30 ) + 10 )) + done + return 1 +} + dns_checks() { err_count=0 diff_c=0 @@ -252,6 +281,16 @@ done ) & BACKGROUND_TASKS+=($!) +( +while true; do + if ! rspamd_checks; then + echo -e "\e[31m$(date) - Rspamd hit error limit\e[0m" + echo rspamd-mailcow > /tmp/com_pipe + fi +done +) & +BACKGROUND_TASKS+=($!) + # Monitor watchdog agents, stop script when agents fails and wait for respawn by Docker (restart:always:n) ( while true; do