#!/bin/sh
# =============================================================================
# Physarum Pre-Push Gate (Bellman Optimal)
# =============================================================================
#
# This hook runs BEFORE pushing, preventing bad code from reaching CI/CD.
# Implements Bellman-optimal test selection based on change impact.
#
# Mathematical Basis:
# - Test priority = impact_score / test_duration
# - High-impact, fast tests run first (maximize value per time)
# - Skip tests unrelated to changes (minimize wasted compute)
#
# =============================================================================

echo "Physarum Pre-Push Gate (Bellman Optimal)"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

# Get the remote and branch being pushed to
REMOTE="$1"
URL="$2"

# Get changed files against the PR-relevant base. Prefer origin/main's
# merge-base so rebased branches report the review scope instead of stale
# branch topology; keep the historical fallback chain if that cannot resolve.
BRANCH=$(git rev-parse --abbrev-ref HEAD)
MERGE_BASE=$(git merge-base HEAD origin/main 2>/dev/null || true)
if [ -n "$MERGE_BASE" ]; then
  CHANGED_FILES=$(git diff --name-only "$MERGE_BASE"...HEAD)
else
  CHANGED_FILES=$(git diff --name-only origin/$BRANCH...HEAD 2>/dev/null || git diff --name-only origin/main...HEAD 2>/dev/null || git diff --name-only HEAD~5...HEAD)
fi

if [ -z "$CHANGED_FILES" ]; then
  echo "✓ No changes to validate"
  exit 0
fi

echo "Validating changes before push to $REMOTE ($BRANCH)"
echo ""

# =============================================================================
# Categorize Changes (for Bellman-optimal test selection)
# =============================================================================
HAS_BACKEND=$(echo "$CHANGED_FILES" | grep -c "^backend/" || true)
HAS_FRONTEND=$(echo "$CHANGED_FILES" | grep -c "^frontend/" || true)
HAS_MODELS=$(echo "$CHANGED_FILES" | grep -c "models\|schemas" || true)
HAS_API=$(echo "$CHANGED_FILES" | grep -c "api/" || true)
HAS_TESTS=$(echo "$CHANGED_FILES" | grep -c "test" || true)
HAS_MIGRATIONS=$(echo "$CHANGED_FILES" | grep -c "alembic" || true)
HAS_LOCAL_TOOLING=$(echo "$CHANGED_FILES" | grep -cE "^(scripts/|\\.husky/)" || true)
HAS_CI_WORKFLOWS=$(echo "$CHANGED_FILES" | grep -c "^\\.github/workflows/" || true)

echo "Change Analysis:"
echo "  Backend: $HAS_BACKEND files"
echo "  Frontend: $HAS_FRONTEND files"
echo "  Models/Schemas: $HAS_MODELS files"
echo "  API: $HAS_API files"
echo "  Tests: $HAS_TESTS files"
echo "  Migrations: $HAS_MIGRATIONS files"
echo "  Local tooling: $HAS_LOCAL_TOOLING files"
echo "  CI/workflows: $HAS_CI_WORKFLOWS files"
echo ""

# =============================================================================
# Stash Validation (#884-P0-4)
# =============================================================================
STASH_COUNT=$(git stash list 2>/dev/null | wc -l | tr -d ' ')
if [ "$STASH_COUNT" -gt 0 ]; then
  echo "⚠ WARNING: $STASH_COUNT stashed change(s) detected."
  echo "  Stashed changes may contain uncommitted work that should be"
  echo "  committed or dropped before pushing. Run: git stash list"
  echo ""
fi

# =============================================================================
# Stub/Placeholder Detection (#884-P0-6)
# =============================================================================
STUB_HITS=""
if [ "$HAS_BACKEND" -gt 0 ]; then
  STUB_HITS=$(echo "$CHANGED_FILES" | grep "^backend/app/" | while read -r f; do
    [ -f "$f" ] && grep -nHiE '\bSTUB\b|\bPLACEHOLDER\b|\bFIXME\b|# *TODO.*implement|pass *# *TODO|raise NotImplementedError' "$f" 2>/dev/null
  done || true)
fi
if [ "$HAS_FRONTEND" -gt 0 ]; then
  FE_STUBS=$(echo "$CHANGED_FILES" | grep "^frontend/\(app\|components\|lib\)/" | while read -r f; do
    [ -f "$f" ] && grep -nHiE '\bSTUB\b|\bPLACEHOLDER\b|\bFIXME\b|// *TODO.*implement|throw new Error\(.*(not implemented|todo)' "$f" 2>/dev/null
  done || true)
  STUB_HITS="$STUB_HITS$FE_STUBS"
fi
if [ -n "$STUB_HITS" ]; then
  echo "⚠ WARNING: Stub/placeholder code detected in changed files:"
  echo "$STUB_HITS" | head -10
  echo "  Review before pushing to ensure no unfinished code ships."
  echo ""
fi

# =============================================================================
# Change Scope Warning (#884-P0-1)
# =============================================================================
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l | tr -d ' ')
if [ "$FILE_COUNT" -ge 10 ]; then
  echo "⚠ LARGE CHANGE: $FILE_COUNT files modified."
  echo "  Consider: full cross-layer trace + all gates + self-review diff."
  echo ""
elif [ "$FILE_COUNT" -ge 4 ]; then
  echo "⚠ MEDIUM CHANGE: $FILE_COUNT files modified."
  echo "  Consider: grep + trace affected layers. Run gate."
  echo ""
fi

# =============================================================================
# CI-in-Progress Push Prevention (#884-P0-16)
# =============================================================================
if command -v gh >/dev/null 2>&1; then
  RUNNING_RUNS=$(gh run list --repo Arqera-IO/ARQERA --branch "$BRANCH" --status in_progress --json databaseId --limit 1 2>/dev/null | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0")
  if [ "$RUNNING_RUNS" -gt 0 ]; then
    echo "⚠ WARNING: CI is already running on branch '$BRANCH'."
    echo "  Pushing now will cancel the in-progress run."
    echo "  Consider waiting for CI to complete first."
    echo ""
  fi
fi

# =============================================================================
# Backend Validation (if changes)
# =============================================================================
if [ "$HAS_BACKEND" -gt 0 ]; then
  echo "📦 Backend Pre-Push Validation"
  echo "─────────────────────────────"

  cd backend

  # Detect git worktree early — used by import check AND test step.
  # In a worktree (created via scripts/agent-workspace.sh) there's no
  # local venv and no project deps; CI runs the full suite anyway.
  GIT_TOPLEVEL="$(cd .. && git rev-parse --show-toplevel 2>/dev/null || echo "")"
  GIT_COMMON_DIR="$(cd .. && git rev-parse --git-common-dir 2>/dev/null || echo ".git")"
  IS_WORKTREE=false
  if [ -f "../.git" ] || [ "$GIT_COMMON_DIR" != ".git" ]; then
    IS_WORKTREE=true
  fi

  # 1. Full lint check
  echo "  [1/4] Running full lint check..."
  if command -v ruff &> /dev/null; then
    if ! ruff check app/ 2>/dev/null; then
      echo "  ✗ Lint errors detected. Fix before pushing."
      cd ..
      exit 1
    fi
    echo "  ✓ Lint passed"
  fi

  # 2. Import verification (catches circular imports)
  echo "  [2/4] Verifying imports..."
  if [ "$IS_WORKTREE" = "true" ]; then
    echo "  ⚠ Skipping import check (git worktree — CI will verify)"
  else
    # Use backend venv Python if available, fall back to system python3
    BACKEND_PYTHON="python3"
    if [ -f "../.venv/bin/python" ]; then
      BACKEND_PYTHON="../.venv/bin/python"
    elif [ -f ".venv/bin/python" ]; then
      BACKEND_PYTHON=".venv/bin/python"
    elif [ -f "venv/bin/python" ]; then
      BACKEND_PYTHON="venv/bin/python"
    fi
    if ! $BACKEND_PYTHON -c "from app.main import app" 2>/dev/null; then
      echo "  ✗ Import error detected. App cannot start."
      cd ..
      exit 1
    fi
    echo "  ✓ Imports OK"
  fi

  # 3. Bellman-optimal test selection
  echo "  [3/4] Running targeted tests..."
  if [ "$IS_WORKTREE" = "true" ]; then
    # Worktrees share the git objects but do NOT have a local venv with
    # backend deps. Bare `pytest` resolves to system Python and fails
    # with `ModuleNotFoundError: No module named 'sqlalchemy'`. Using
    # `uv run pytest` also fails because uv builds its own env from
    # pyproject.toml which lacks backend deps. Skip targeted tests in
    # worktrees — CI runs the full suite regardless.
    echo "  ⚠ Skipping targeted tests (git worktree — CI will verify)"
  else
    TESTS_TO_RUN=""

    # Select tests based on changed files (Bellman optimal: impact/duration)
    # Paths must match actual test file locations in backend/tests/
    if [ "$HAS_API" -gt 0 ]; then
      TESTS_TO_RUN="$TESTS_TO_RUN tests/core/test_main_scheduler.py"
    fi
    if echo "$CHANGED_FILES" | grep -q "auth\|security"; then
      TESTS_TO_RUN="$TESTS_TO_RUN tests/security/test_auth_security.py tests/behavior/test_auth_behavior.py"
    fi

    if [ -n "$TESTS_TO_RUN" ]; then
      # Use uv run pytest to ensure project venv dependencies are available.
      # Bare `pytest` may use system Python which lacks sqlalchemy etc.
      PYTEST_CMD="pytest"
      if command -v uv &> /dev/null; then
        PYTEST_CMD="uv run pytest"
      fi
      if command -v pytest &> /dev/null || command -v uv &> /dev/null; then
        $PYTEST_CMD $TESTS_TO_RUN -v --tb=short -q --maxfail=3 2>/dev/null || {
          echo "  ✗ Tests failed. Fix before pushing."
          cd ..
          exit 1
        }
        echo "  ✓ Targeted tests passed"
      else
        echo "  ⚠ pytest not available, skipping tests"
      fi
    else
      echo "  ✓ No critical tests needed for these changes"
    fi
  fi

  # 4. Migration check (if changed)
  if [ "$HAS_MIGRATIONS" -gt 0 ]; then
    echo "  [4/4] Checking migrations..."
    if ! alembic check 2>/dev/null; then
      echo "  ⚠ Migration check warning (review manually)"
    else
      echo "  ✓ Migrations OK"
    fi
  else
    echo "  [4/4] No migration changes"
  fi

  cd ..
  echo ""
fi

# =============================================================================
# Frontend Validation (if changes)
# =============================================================================
if [ "$HAS_FRONTEND" -gt 0 ]; then
  echo "🎨 Frontend Pre-Push Validation"
  echo "──────────────────────────────"

  cd frontend

  # Pre-flight: verify dependencies are installed. In git worktrees,
  # frontend/node_modules is NOT shared from the main repo by default. Without
  # this check, npm run type-check fails with "tsc: command not found" and the
  # (previously silenced) error looked like a TypeScript failure — costing real
  # debug cycles. scripts/agent-workspace.sh auto-symlinks node_modules for
  # worktree agents; this check catches any lingering misconfiguration.
  if [ ! -x "node_modules/.bin/tsc" ]; then
    echo "  ✗ node_modules/.bin/tsc is missing or not executable."
    echo "    If this is a git worktree, re-create it via 'bash scripts/agent-workspace.sh'"
    echo "    which now auto-symlinks frontend/node_modules from the main repo."
    echo "    Otherwise run 'pnpm install' (or 'npm install') inside frontend/."
    cd ..
    exit 1
  fi

  # 1. TypeScript type check
  echo "  [1/2] Running TypeScript check..."
  if command -v npm &> /dev/null; then
    # Capture output so we can show real errors on failure instead of silencing
    # stderr (previously `2>/dev/null`, which hid "tsc: command not found" and
    # other actionable errors — a Tier 1 honesty violation).
    TYPECHECK_OUTPUT=$(npm run type-check 2>&1)
    TYPECHECK_STATUS=$?
    if [ $TYPECHECK_STATUS -ne 0 ]; then
      echo "  ✗ TypeScript check failed. Output:"
      echo "$TYPECHECK_OUTPUT" | sed 's/^/    /'
      cd ..
      exit 1
    fi
    echo "  ✓ TypeScript check passed"
  fi

  # 2. ESLint
  echo "  [2/2] Running ESLint..."
  LINT_OUTPUT=$(npm run lint 2>&1)
  LINT_STATUS=$?
  if [ $LINT_STATUS -ne 0 ]; then
    echo "  ⚠ Lint warnings (review before pushing). Output:"
    echo "$LINT_OUTPUT" | sed 's/^/    /'
  else
    echo "  ✓ ESLint passed"
  fi

  cd ..
  echo ""
fi

# =============================================================================
# CI Health Check (prevents top CI failure modes)
# =============================================================================
echo "Running CI health check..."
if command -v python3 >/dev/null 2>&1 && [ -f "scripts/check-ci-health.py" ]; then
  python3 scripts/check-ci-health.py || {
    echo "CI health check FAILED — fix issues before pushing"
    exit 1
  }
fi
echo ""

# =============================================================================
# ARQERA Governance Gate (if staging is reachable)
# =============================================================================
ARQERA_KEY=$(cat ~/.arqera/swarm-key 2>/dev/null || echo "")
ARQERA_URL="${ARQERA_URL:-https://staging.arqera.io}"

echo "🏛 ARQERA Governance Check"
echo "──────────────────────────"

# Per arq://doc/principle/arqera-contract-enforcement-v1: substrate-canonical
# emission via twin, NOT curl to a possibly-non-existent endpoint.
#
# History: the /api/swarm/messages POST endpoint was removed/refactored
# at some point. The hook continued curl-ing the ghost endpoint and silently
# fell through to "ARQERA unreachable — proceeding without governance" for
# weeks (visible in act-queue from 2026-04-23 onward). This is the proxy-
# validation bug: validating against a deleted endpoint can't possibly fail
# in a useful way.
#
# Fix: emit the push event directly to substrate. twin signs, addressing
# service stores, addressing service IS the evidence chain. No staging
# HTTP dependency.

DIFF_STAT=$(git diff --stat origin/$BRANCH...HEAD 2>/dev/null | tail -1 || echo "unknown")
AGENT_ID="${ARQERA_AGENT_ID:-unknown}"
PUSH_TS=$(date -u +%Y%m%dT%H%M%SZ)
PUSH_REF="${BRANCH//\//-}-${PUSH_TS}"

PUSH_PAYLOAD=$(cat <<PAYLOAD_EOF
{
  "branch": "${BRANCH}",
  "agent_id": "${AGENT_ID}",
  "files_changed": ${FILE_COUNT},
  "diff_stat": "${DIFF_STAT}",
  "backend_files": ${HAS_BACKEND},
  "frontend_files": ${HAS_FRONTEND},
  "has_migrations": $([ "$HAS_MIGRATIONS" -gt 0 ] && echo "true" || echo "false"),
  "principle": "arq://doc/principle/arqera-contract-enforcement-v1",
  "issued_at": "$(date -u +%FT%TZ)"
}
PAYLOAD_EOF
)

if twin --use-keychain act emit --payload "$PUSH_PAYLOAD" \
       act push_recorded "$PUSH_REF" >/dev/null 2>&1; then
    echo "  ✓ Push recorded in ARQERA evidence chain (substrate-canonical)"
    echo "    arq://act/push_recorded/${PUSH_REF}"
else
    # twin failed — substrate write didn't land directly. The act-queue
    # daemon will retry, but per the principle we MUST NOT silently proceed.
    if [ "${ARQERA_PUSH_BYPASS:-0}" = "1" ]; then
        echo "  ⚠ twin act emit failed — proceeding under ARQERA_PUSH_BYPASS=1"
        echo "    (substrate-attestation deferred; act-queue daemon will retry)"
    else
        echo "  ✗ twin act emit failed — BLOCKING push"
        echo "    twin command failed. Check: twin --use-keychain status"
        echo "    Emergency override: set ARQERA_PUSH_BYPASS=1 (still attempts queue)"
        exit 1
    fi
fi

# (Removed: second curl POST to ${ARQERA_URL}/api/evidence/ — another
# ghost endpoint that the prior version called silently. Evidence is
# already in substrate via the twin act emit above; no parallel HTTP path.)

echo ""

# =============================================================================
# Final Gate
# =============================================================================
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "✓ Pre-push gate passed - ready for CI/CD"
echo ""
echo "Physarum conductance: High (stable codebase)"
echo "Bellman cost: Optimized (targeted tests only)"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
