#!/usr/bin/env bash
set -euo pipefail

RUNTIME_ENV_FILE="${WELLAU_SSLVPN_RUNTIME_ENV_FILE:-$HOME/.config/wellau-sslvpn/runtime.env}"
if [[ -f "$RUNTIME_ENV_FILE" ]]; then
  # shellcheck disable=SC1090
  source "$RUNTIME_ENV_FILE"
fi

REPO_DIR="${SSLVPN_REPO_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
STATUS_CMD="${WELLAU_VPN_STATUS_CMD:-$HOME/.local/bin/wellau-vpn-status}"
CONNECT_CMD="${WELLAU_VPN_CONNECT_CMD:-$HOME/.local/bin/wellau-vpn-connect-env}"
STATE_DIR="${WELLAU_VPN_STATE_DIR:-$HOME/.local/state/wellau-vpn}"
LOG_FILE="$STATE_DIR/watchdog.log"
COUNT_FILE="$STATE_DIR/watchdog-consecutive-fail-count"
LAST_RECONNECT_FILE="$STATE_DIR/watchdog-last-reconnect-epoch"
LAST_RECONNECT_ATTEMPT_FILE="$STATE_DIR/watchdog-last-reconnect-attempt-epoch"
LOCK_FILE="$STATE_DIR/watchdog.lock"
FAIL_THRESHOLD="${WELLAU_VPN_WATCHDOG_FAIL_THRESHOLD:-2}"
CONTROL_FAIL_THRESHOLD="${WELLAU_VPN_WATCHDOG_CONTROL_FAIL_THRESHOLD:-1}"
COOLDOWN_SECONDS="${WELLAU_VPN_WATCHDOG_COOLDOWN_SECONDS:-600}"
FAILED_ATTEMPT_COOLDOWN_SECONDS="${WELLAU_VPN_WATCHDOG_FAILED_ATTEMPT_COOLDOWN_SECONDS:-90}"

mkdir -p "$STATE_DIR"
exec 201>"$LOCK_FILE"
if ! flock -n 201; then
  printf '%s INFO watchdog already running\n' "$(date -Iseconds)" >> "$LOG_FILE"
  exit 0
fi

log() {
  printf '%s %s\n' "$(date -Iseconds)" "$*" | tee -a "$LOG_FILE"
}

summary_value() {
  local summary="$1"
  local key="$2"
  local part
  for part in $summary; do
    case "$part" in
      "$key="*) printf '%s\n' "${part#*=}"; return 0 ;;
    esac
  done
  printf 'unknown\n'
}

read_int_file() {
  local file="$1"
  local default="$2"
  if [[ -s "$file" ]]; then
    sed -n '1p' "$file" | tr -cd '0-9' || true
  else
    printf '%s' "$default"
  fi
}

status_output="$("$STATUS_CMD" --fresh 2>&1 || true)"
summary="$(grep -m1 '^SUMMARY ' <<<"$status_output" || true)"
if [[ -z "$summary" ]]; then
  log "WARN status returned no SUMMARY; output=${status_output//$'\n'/ | }"
  exit 0
fi

control="$(summary_value "$summary" control)"
data="$(summary_value "$summary" data)"
service="$(summary_value "$summary" service)"

if [[ "$control" == "ok" && "$data" != "fail" ]]; then
  printf '0\n' > "$COUNT_FILE"
  log "OK $summary"
  exit 0
fi

reconnect_reason="data-plane blackout"
threshold="$FAIL_THRESHOLD"
if [[ "$control" != "ok" ]]; then
  reconnect_reason="control-plane failure"
  threshold="$CONTROL_FAIL_THRESHOLD"
fi

count="$(read_int_file "$COUNT_FILE" 0)"
count="${count:-0}"
count=$((count + 1))
printf '%s\n' "$count" > "$COUNT_FILE"
log "WARN $summary reason=$reconnect_reason consecutive_failures=$count threshold=$threshold service=$service"

if (( count < threshold )); then
  exit 0
fi

now="$(date +%s)"
last_reconnect="$(read_int_file "$LAST_RECONNECT_FILE" 0)"
last_reconnect="${last_reconnect:-0}"
elapsed=$((now - last_reconnect))
if (( elapsed < COOLDOWN_SECONDS )); then
  log "WARN reconnect suppressed by successful-reconnect cooldown elapsed=${elapsed}s cooldown=${COOLDOWN_SECONDS}s"
  exit 0
fi

last_attempt="$(read_int_file "$LAST_RECONNECT_ATTEMPT_FILE" 0)"
last_attempt="${last_attempt:-0}"
attempt_elapsed=$((now - last_attempt))
if (( attempt_elapsed < FAILED_ATTEMPT_COOLDOWN_SECONDS )); then
  log "WARN reconnect retry suppressed by failed-attempt cooldown elapsed=${attempt_elapsed}s cooldown=${FAILED_ATTEMPT_COOLDOWN_SECONDS}s"
  exit 0
fi

if [[ ! -x "$CONNECT_CMD" ]]; then
  log "ERROR connect command missing or not executable: $CONNECT_CMD"
  exit 1
fi

log "ACTION reconnecting VPN after confirmed $reconnect_reason"
printf '%s\n' "$now" > "$LAST_RECONNECT_ATTEMPT_FILE"
connect_env=(WELLAU_VPN_NONINTERACTIVE_SUDO=1)
if [[ "$reconnect_reason" == "data-plane blackout" ]]; then
  connect_env+=(WELLAU_VPN_STOP_OLD_BEFORE_AUTH=1)
fi
if env "${connect_env[@]}" "$CONNECT_CMD" >> "$LOG_FILE" 2>&1; then
  printf '%s\n' "$(date +%s)" > "$LAST_RECONNECT_FILE"
  printf '0\n' > "$COUNT_FILE"
  log "OK reconnect command completed"
  exit 0
fi

log "ERROR reconnect command failed"
exit 1
