#!/usr/bin/env bash
set -euo pipefail

# s — execution telemetry collector
# Intercepts high-risk ops: kubectl apply/delete, docker build/push,
# git push/merge/rebase. Records exit code, duration, cwd.
#
# Usage:
#   alias kubectl='s kubectl'
#   alias docker='s docker'
#   alias git='s git'
#
# Data: ~/.s/telemetry.jsonl (append-only, JSON lines)
# No platform. No API. No database. Just local telemetry.

TELEMETRY_DIR="${HOME}/.s"
TELEMETRY_FILE="${TELEMETRY_DIR}/telemetry.jsonl"
SESSION_FILE="${TELEMETRY_DIR}/session"

mkdir -p "${TELEMETRY_DIR}"

if [ ! -f "${SESSION_FILE}" ]; then
  echo "s: first run — initializing telemetry at ${TELEMETRY_FILE}"
  echo '{}' > "${SESSION_FILE}"
fi

# Tool patterns: "tool subcommand" → tool name for grouping
TARGET_PATTERNS=(
  # Kubectl - cluster modification (high-risk)
  "kubectl apply"
  "kubectl delete"
  "kubectl patch"
  "kubectl replace"
  "kubectl scale"
  "kubectl expose"
  "kubectl drain"
  "kubectl cordon"
  "kubectl taint"
  "kubectl rollout"
  "kubectl load"
  "kubectl exec"
  "kubectl rollback"
  
  # Docker - container/image modification (high-risk)
  "docker build"
  "docker push"
  "docker pull"
  "docker rmi"
  "docker run"
  "docker system prune"
  "docker volume prune"
  "docker network prune"
  "docker image prune"
  "docker container prune"
  "docker compose"
  "docker swarm"
  "docker buildx"
  "docker compose up"
  "docker compose down"
  "docker compose restart"
  
  # Helm - chart release modification (high-risk)
  "helm install"
  "helm upgrade"
  "helm rollback"
  "helm uninstall"
  "helm dependency"
  "helm repo add"
  "helm repo update"
  
  # Terraform - infrastructure modification (high-risk)
  "terraform apply"
  "terraform destroy"
  "terraform import"
  "terraform taint"
  "terraform untaint"
  "terraform state"
  "terraform providers lock"
  "terraform providers mirror"
  "terraform fmt"
  "terraform validate"
  
  # Git - history modification (high-risk)
  "git push"
  "git merge"
  "git rebase"
  "git reset"
  "git checkout"
  "git switch"
  "git branch -d"
  "git tag -d"
  "git revert"
  "git clean -f"
  "git fsck"
  "git gc --prune=now"
  "git reflog expire"
)

is_target() {
  local cmd="$1"
  for pattern in "${TARGET_PATTERNS[@]}"; do
    if [[ "$cmd" == *"$pattern"* ]]; then
      return 0
    fi
  done
  return 1
}

extract_tool_sub() {
  local cmd="$1"
  for pattern in "${TARGET_PATTERNS[@]}"; do
    if [[ "$cmd" == *"$pattern"* ]]; then
      echo "$pattern"
      return 0
    fi
  done
  echo "unknown"
}

RED='\033[0;31m'
YELLOW='\033[0;33m'
GREEN='\033[0;32m'
RESET='\033[0m'

FULL_CMD="$@"

if [ $# -eq 0 ] || [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
  echo "s — execution telemetry collector"
  echo ""
  echo "Targets: ${TARGET_PATTERNS[*]}"
  echo "Other commands pass through transparently."
  echo "Set S_AGENT env to tag sessions."
  echo "Set S_AUTO=1 to skip confirm prompt."
  echo ""
  echo "Commands:"
  echo "  s --signals    Show daily telemetry report"
  echo "  s --clear      Clear telemetry history"
  echo "  s --help       This help"
  echo ""
  echo "Data: ~/.s/telemetry.jsonl"
  exit 0
fi

# Report command
if [ "$1" = "--signals" ]; then
  if [ ! -f "${TELEMETRY_FILE}" ]; then
    echo "s: no telemetry yet"
    exit 0
  fi
  echo "=== s Telemetry Report ==="
  python3 -c "
import json, collections, sys
entries = [json.loads(l) for l in open('${TELEMETRY_FILE}') if l.strip()]
if not entries:
    print('No entries')
    sys.exit(0)
total = len(entries)
fails = [e for e in entries if e.get('exit_code', 0) != 0 and e.get('exit_code', -1) != -1]
aborted = [e for e in entries if e.get('aborted')]
by_tool = collections.Counter(e.get('tool_sub','unknown') for e in entries)
fail_rate_by_tool = {}
for tool, cnt in by_tool.items():
    tool_fails = len([e for e in entries if e.get('tool_sub')==tool and e.get('exit_code',0)!=0 and e.get('exit_code',-1)!=-1])
    fail_rate_by_tool[tool] = (tool_fails, cnt, tool_fails*100//cnt if cnt else 0)
print(f'Total ops: {total}  |  Failed: {len(fails)}  |  Aborted: {len(aborted)}')
print()
print('By tool:')
for tool, cnt in by_tool.most_common():
    f, c, r = fail_rate_by_tool[tool]
    icon = '🔴' if r >= 50 else '🟡' if r >= 20 else '🟢'
    print(f'  {icon} {tool:25s} {cnt:4d} ops  {f:3d} fails  {r:2d}% fail rate')
hesitations = [e for e in entries if e.get('hesitation_ms', 0) > 0]
if hesitations:
    avg_h = sum(e['hesitation_ms'] for e in hesitations) // len(hesitations)
    print(f'\nAvg hesitation: {avg_h}ms over {len(hesitations)} ops')
"
  exit 0
fi

# Clear command
if [ "$1" = "--clear" ]; then
  : > "${TELEMETRY_FILE}"
  echo "s: telemetry cleared"
  exit 0
fi

if ! is_target "$FULL_CMD"; then
  exec "$@"
fi

TOOL_SUB=$(extract_tool_sub "$FULL_CMD")
CMD_HASH=$(printf '%s' "$FULL_CMD" | sha256sum | cut -c1-12 2>/dev/null || echo "unknown")

# Hesitation detection: record when command was generated vs executed
HESITATION_MS=0
HESITATION_START=$(date +%s%N)

# Check telemetry for similar commands
SIMILAR_FAILS=0
SIMILAR_TOTAL=0
if [ -f "${TELEMETRY_FILE}" ]; then
  while IFS= read -r line; do
    if [[ -z "$line" ]]; then continue; fi
    recorded_cmd_hash=$(echo "$line" | python3 -c "import sys,json; d=json.loads(sys.stdin.readline()); print(d.get('command_hash',''))" 2>/dev/null || echo "")
    if [[ "$recorded_cmd_hash" == "$CMD_HASH" ]]; then
      SIMILAR_TOTAL=$((SIMILAR_TOTAL + 1))
      recorded_exit=$(echo "$line" | python3 -c "import sys,json; d=json.loads(sys.stdin.readline()); print(d.get('exit_code',0))" 2>/dev/null || echo "0")
      if [ "$recorded_exit" != "0" ]; then
        SIMILAR_FAILS=$((SIMILAR_FAILS + 1))
      fi
    fi
  done < "${TELEMETRY_FILE}"
fi

# Emit risk indicator
CONFIRM_REQUIRED=false
if [ $SIMILAR_TOTAL -gt 0 ]; then
  FAIL_RATE=$(( SIMILAR_FAILS * 100 / SIMILAR_TOTAL ))
  if [ $FAIL_RATE -ge 50 ]; then
    echo -e "${RED}⚠ s: ${TOOL_SUB} — ${FAIL_RATE}% failure rate in ${SIMILAR_TOTAL} similar ops${RESET}"
    CONFIRM_REQUIRED=true
  elif [ $FAIL_RATE -ge 20 ]; then
    echo -e "${YELLOW}⚠ s: ${TOOL_SUB} — ${FAIL_RATE}% failure rate in ${SIMILAR_TOTAL} similar ops${RESET}"
  else
    echo -e "${GREEN}s: ${TOOL_SUB} — ${FAIL_RATE}% failure rate in ${SIMILAR_TOTAL} similar ops${RESET}"
  fi
fi

# Also flag delete/destroy commands regardless of history
case " $FULL_CMD " in
  *" delete "*|*" destroy "*|*" drain "*)
    CONFIRM_REQUIRED=true
    ;;
esac

# Confirm before high-risk ops (skip if S_AUTO=1 or pipe)
if [ "$CONFIRM_REQUIRED" = true ] && [ "${S_AUTO:-0}" != "1" ] && [ -t 0 ]; then
  echo -e "${YELLOW}⚠ s: Type 'yes' to run, or Ctrl-C to abort:${RESET} "
  read -r CONFIRM < /dev/tty 2>/dev/null || read -r CONFIRM
  if [ "$CONFIRM" != "yes" ]; then
    echo -e "${RED}s: aborted${RESET}"
    HESITATION_MS=$(( ($(date +%s%N) - HESITATION_START) / 1000000 ))
    LOG_ENTRY=$(cat <<EOF
{
  "timestamp": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
  "command": "$(echo "$FULL_CMD" | sed 's/"/\\"/g')",
  "command_hash": "${CMD_HASH}",
  "tool_sub": "${TOOL_SUB}",
  "cwd": "$(pwd)",
  "exit_code": -1,
  "duration_ms": 0,
  "hesitation_ms": ${HESITATION_MS},
  "session_id": "$(hostname)-$$",
  "agent": "${S_AGENT:-unknown}",
  "aborted": true
}
EOF
)
    echo "${LOG_ENTRY}" >> "${TELEMETRY_FILE}"
    exit 1
  fi
  HESITATION_MS=$(( ($(date +%s%N) - HESITATION_START) / 1000000 ))
fi

START_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
START_MS=$(date +%s%N)

set +e
"$@"
EXIT_CODE=$?
set -e

END_MS=$(date +%s%N)
DURATION_MS=$(( (END_MS - START_MS) / 1000000 ))

# Log telemetry
LOG_ENTRY=$(cat <<EOF
{
  "timestamp": "${START_TS}",
  "command": "$(echo "$FULL_CMD" | sed 's/"/\\"/g')",
  "command_hash": "${CMD_HASH}",
  "tool_sub": "${TOOL_SUB}",
  "cwd": "$(pwd)",
  "exit_code": ${EXIT_CODE},
  "duration_ms": ${DURATION_MS},
  "hesitation_ms": ${HESITATION_MS},
  "session_id": "$(hostname)-$$",
  "agent": "${S_AGENT:-unknown}"
}
EOF
)

echo "${LOG_ENTRY}" >> "${TELEMETRY_FILE}"

if [ $EXIT_CODE -eq 0 ]; then
  echo -e "${GREEN}s: done (${DURATION_MS}ms)${RESET}"
else
  echo -e "${RED}s: failed (exit ${EXIT_CODE}, ${DURATION_MS}ms)${RESET}"
fi

exit $EXIT_CODE
