#!/usr/bin/env python3
"""First-run setup: profile host repo, smoke alc_mcp, write session context.

Run from inside a target repo after the package is installed. Idempotent.

Usage:
    alc_init [--repo PATH] [--state-dir PATH] [--install-deps] [--skip-mcp-smoke] [--quiet]

Reads from:
    - the host repo file tree (read-only profiling)
    - optionally pip-installs `requirements-optional.txt` from the plugin dir

Writes:
    - <state-root>/repos/<repo-id>/reports/latest-session-context.md
    - prints a JSON summary on stdout
"""

from __future__ import annotations

import argparse
import json
import os
import pathlib
import subprocess
import sys

PLUGIN_ROOT = pathlib.Path(__file__).resolve().parents[1]
sys.path.insert(0, str(PLUGIN_ROOT / "bin"))

import alc_query  # noqa: E402
import ce_playbook  # noqa: E402
from state_handle import StateHandle  # noqa: E402
from session_context_render import (  # noqa: E402
    render_runtime_summary_md,
    render_ce_usage_md,
    render_doc_contract_md,
    render_session_context,
)

# Prefixes that mark a skill as part of the compound-engineering family or
# adjacent workflow tooling worth tracking. Match against `actor_name` in
# events. We bias toward over-inclusion — the section can always filter
# down at render time, but missing data can't be reconstructed.
CE_SKILL_PREFIXES = (
    "ce-",
    "compound-engineering:",
    "improve-codebase-architecture",
    "to-prd",
    "to-issues",
)

# Documentation contract: files/dirs ALC expects in a healthy repo, paired
# with the CE skill that produces them. Missing entries surface as hints in
# session-context (with manual fallback when CE isn't installed).
DOC_CONTRACT = [
    # (label, paths_to_check_in_order, generator_skill, tier)
    ("STRATEGY.md", ["STRATEGY.md"], "ce-strategy", "anchor"),
    ("Repo guide", ["AGENTS.md", "CLAUDE.md", "GEMINI.md"], None, "anchor"),
    ("ARCHITECTURE.md", ["ARCHITECTURE.md", "docs/ARCHITECTURE.md"],
        "improve-codebase-architecture", "architecture"),
    ("CONTEXT.md", ["CONTEXT.md", "context.md"],
        "ce-agent-native-architecture", "architecture"),
    ("ADRs", ["docs/adr", "docs/adrs", "docs/decisions"],
        "improve-codebase-architecture", "architecture"),
    ("Brainstorms", ["docs/brainstorms"], "ce-brainstorm", "workflow"),
    ("Plans", ["docs/plans"], "ce-plan", "workflow"),
]

EXT_TO_LANG = {
    ".py": "python", ".rb": "ruby", ".ts": "typescript", ".tsx": "typescript",
    ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript",
    ".go": "go", ".rs": "rust", ".java": "java", ".kt": "kotlin",
    ".swift": "swift", ".cs": "csharp", ".php": "php", ".clj": "clojure",
    ".ex": "elixir", ".exs": "elixir", ".erl": "erlang", ".scala": "scala",
    ".lua": "lua", ".sh": "shell", ".bash": "shell", ".zsh": "shell",
    ".sql": "sql", ".vue": "vue", ".svelte": "svelte",
}

SKIP_DIRS = {
    "node_modules", "__pycache__", ".git", ".venv", "venv", "dist", "build",
    ".next", ".nuxt", ".cache", "vendor", "target", ".pytest_cache",
    ".agent-learning", "coverage", ".mypy_cache", ".ruff_cache",
}


def detect_repo(repo: pathlib.Path) -> dict:
    """Profile the host repo. Read-only; bounded traversal."""
    profile: dict = {
        "name": repo.name,
        "abspath": str(repo),
        "has_git": (repo / ".git").is_dir(),
        "languages": {},
        "frameworks": [],
        "has_tests": False,
        "has_frontend": False,
        "monorepo": False,
        "package_managers": [],
    }

    ext_counts: dict[str, int] = {}
    for dirpath, dirnames, filenames in os.walk(repo):
        dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS and not d.startswith(".")]
        for fname in filenames:
            ext = pathlib.Path(fname).suffix.lower()
            if ext:
                ext_counts[ext] = ext_counts.get(ext, 0) + 1

    for ext, count in ext_counts.items():
        lang = EXT_TO_LANG.get(ext)
        if lang:
            profile["languages"][lang] = profile["languages"].get(lang, 0) + count

    # Framework + package-manager hints (file-presence-based)
    fm = profile["frameworks"]
    pms = profile["package_managers"]
    if (repo / "Gemfile").is_file():
        pms.append("bundler")
        if (repo / "config" / "application.rb").is_file():
            fm.append("rails")
    if (repo / "package.json").is_file():
        pms.append("npm")
        try:
            pkg = json.loads((repo / "package.json").read_text(encoding="utf-8"))
            deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
            for key, label in (
                ("next", "nextjs"), ("react", "react"), ("vue", "vue"),
                ("svelte", "svelte"), ("@sveltejs/kit", "sveltekit"),
                ("express", "express"), ("fastify", "fastify"),
                ("hono", "hono"), ("@cloudflare/workers-types", "cloudflare-workers"),
            ):
                if key in deps:
                    fm.append(label)
            if not fm or fm == ["node"]:
                fm.append("node")
        except Exception:
            pass
    if (repo / "pyproject.toml").is_file():
        pms.append("pip/poetry/uv")
        try:
            text = (repo / "pyproject.toml").read_text(encoding="utf-8", errors="ignore").lower()
            if "fastapi" in text:
                fm.append("fastapi")
            if "flask" in text:
                fm.append("flask")
            if "django" in text:
                fm.append("django")
        except Exception:
            pass
    if (repo / "manage.py").is_file():
        fm.append("django")
    if (repo / "wrangler.toml").is_file() or (repo / "wrangler.jsonc").is_file():
        fm.append("cloudflare-workers")
    if (repo / "Cargo.toml").is_file():
        pms.append("cargo")
    if (repo / "go.mod").is_file():
        pms.append("go-modules")
    if (repo / "pom.xml").is_file():
        pms.append("maven")
    if (repo / "build.gradle").is_file() or (repo / "build.gradle.kts").is_file():
        pms.append("gradle")

    # Tests
    for tdir in ("tests", "test", "spec", "__tests__"):
        if (repo / tdir).is_dir():
            profile["has_tests"] = True
            break

    # Frontend
    frontend_frameworks = {"react", "vue", "svelte", "sveltekit", "nextjs"}
    if any(f in frontend_frameworks for f in fm):
        profile["has_frontend"] = True

    # Monorepo
    if (repo / "pnpm-workspace.yaml").is_file() or (repo / "lerna.json").is_file():
        profile["monorepo"] = True
    if (repo / "packages").is_dir() and (repo / "package.json").is_file():
        profile["monorepo"] = True
    if (repo / "apps").is_dir() and (repo / "package.json").is_file():
        profile["monorepo"] = True

    # Dedupe + sort
    profile["frameworks"] = sorted(set(fm))
    profile["package_managers"] = sorted(set(pms))
    return profile


def ensure_mcp(install: bool, quiet: bool) -> dict:
    """Check for `mcp`. If missing and install=True, pip install --user."""
    status = {"available": False, "installed_now": False, "error": None}
    try:
        __import__("mcp")
        status["available"] = True
        return status
    except ImportError:
        pass

    req = PLUGIN_ROOT / "requirements-optional.txt"
    if not install:
        status["error"] = (
            "mcp not installed; pass --install-deps to pip-install optional extras "
            f"from {req}"
        )
        return status

    if not req.is_file():
        status["error"] = f"requirements-optional.txt missing at {req}"
        return status

    cmd = [sys.executable, "-m", "pip", "install", "--user", "-r", str(req)]
    if not quiet:
        print(f"alc_init: pip-installing optional deps from {req}", file=sys.stderr)
    proc = subprocess.run(cmd, capture_output=True, text=True)
    if proc.returncode != 0:
        status["error"] = (proc.stderr or proc.stdout).strip()[:500]
        return status

    status["installed_now"] = True
    try:
        __import__("mcp")
        status["available"] = True
    except ImportError as exc:
        status["error"] = f"pip succeeded but mcp still not importable: {exc}"
    return status


def smoke_mcp(timeout_s: int = 10) -> dict:
    """Run alc_mcp/server.py with initialize + tools/list; return status + tool names."""
    server = PLUGIN_ROOT / "alc_mcp" / "server.py"
    if not server.is_file():
        return {"status": "missing", "tools": [], "error": f"{server} not found"}

    init = (
        '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{'
        '"protocolVersion":"2024-11-05","capabilities":{},'
        '"clientInfo":{"name":"alc_init","version":"1.0"}}}'
    )
    list_req = '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}'
    payload = init + "\n" + list_req + "\n"

    try:
        proc = subprocess.run(
            [sys.executable, str(server)],
            input=payload, text=True, capture_output=True, timeout=timeout_s,
        )
    except subprocess.TimeoutExpired:
        return {"status": "timeout", "tools": [], "error": f"no response within {timeout_s}s"}
    except Exception as exc:
        return {"status": "error", "tools": [], "error": str(exc)}

    tools: list[str] = []
    for line in proc.stdout.splitlines():
        try:
            msg = json.loads(line)
        except json.JSONDecodeError:
            continue
        if msg.get("id") == 2:
            tools = [t["name"] for t in msg.get("result", {}).get("tools", [])]
            break

    if not tools:
        err = (proc.stderr or "").strip()[:500] or "no tools returned"
        return {"status": "no_tools", "tools": [], "error": err}
    return {"status": "green", "tools": tools, "error": None}


def collect_runtime_summary(state: StateHandle) -> dict[str, Any]:
    """Pull the 5 alc_query reads we synthesize into session-context.

    Each call gracefully returns [] / {} when events.sqlite is absent (fresh
    install). We never propagate raw rows to the markdown layer — see
    `render_runtime_summary_md` for the prose synthesis.
    """
    return {
        "actors": alc_query.get_actor_summary(state, since="7d"),
        "applies": alc_query.get_apply_log(state, since="7d"),
        "outcomes": alc_query.get_outcomes(state, since="7d"),
        "recommendations": alc_query.get_recommendations(state),
        "pending_patches": alc_query.get_pending_patches(state),
        "ce_usage": alc_query.get_skill_usage_summary(
            state, since="30d", prefix_filter=list(CE_SKILL_PREFIXES)
        ),
    }


def check_doc_contract(repo: pathlib.Path) -> list[dict[str, Any]]:
    """Probe the host repo for the documents ALC's playbook expects.

    For each contract entry, returns whichever path actually exists (if any),
    the generator skill responsible, and the tier. Treats files and dirs
    uniformly (an empty dir still counts as 'present' — the contract is the
    location, not the contents).
    """
    rows: list[dict[str, Any]] = []
    for label, paths, generator, tier in DOC_CONTRACT:
        found = None
        for relpath in paths:
            candidate = repo / relpath
            if candidate.exists():
                found = relpath
                break
        rows.append({
            "label": label,
            "paths_checked": paths,
            "found": found,
            "generator": generator,
            "tier": tier,
        })
    return rows


def write_session_context(state_root: pathlib.Path, repo_id: str, body: str) -> pathlib.Path:
    out = state_root / "repos" / repo_id / "reports" / "latest-session-context.md"
    out.parent.mkdir(parents=True, exist_ok=True)
    tmp = out.with_suffix(out.suffix + ".tmp")
    tmp.write_text(body, encoding="utf-8")
    tmp.replace(out)
    return out


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--repo", type=pathlib.Path, default=pathlib.Path.cwd(),
                        help="host repo to profile (default: cwd)")
    parser.add_argument("--state-dir", type=pathlib.Path,
                        help="state root (default: <repo>/.agent-learning)")
    parser.add_argument("--install-deps", action="store_true",
                        help="pip-install requirements-optional.txt if mcp missing")
    parser.add_argument("--skip-mcp-smoke", action="store_true",
                        help="don't actually launch the MCP server")
    parser.add_argument("--quiet", action="store_true",
                        help="suppress progress on stderr")
    return parser.parse_args(argv)


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    repo = args.repo.expanduser().resolve()
    state_root = (args.state_dir.expanduser().resolve()
                  if args.state_dir else repo / ".agent-learning")

    if not args.quiet:
        print(f"alc_init: repo={repo}", file=sys.stderr)
        print(f"alc_init: state={state_root}", file=sys.stderr)

    profile = detect_repo(repo)
    if not args.quiet:
        top_lang = max(profile["languages"].items(), key=lambda x: x[1])[0] if profile["languages"] else "_none_"
        print(f"alc_init: top language={top_lang} "
              f"frameworks={profile['frameworks']} "
              f"tests={profile['has_tests']}", file=sys.stderr)

    deps_status = ensure_mcp(install=args.install_deps, quiet=args.quiet)
    if not args.quiet:
        print(f"alc_init: mcp available={deps_status['available']} "
              f"installed_now={deps_status['installed_now']}", file=sys.stderr)

    if args.skip_mcp_smoke:
        mcp_status = {"status": "skipped", "tools": [], "error": "smoke skipped via --skip-mcp-smoke"}
    elif not deps_status["available"]:
        mcp_status = {"status": "unavailable", "tools": [], "error": deps_status.get("error")}
    else:
        mcp_status = smoke_mcp()
    if not args.quiet:
        print(f"alc_init: mcp_status={mcp_status['status']} "
              f"tools={len(mcp_status['tools'])}", file=sys.stderr)

    ce_installed = ce_playbook.detect_ce_installed()

    # Pull from alc_query — synthesized in prose, never raw JSON.
    state_handle_env = os.environ.copy()
    os.environ["AGENT_LEARNING_STATE_DIR"] = str(state_root)
    try:
        state = StateHandle.for_repo(repo)
        runtime_summary = collect_runtime_summary(state)
    finally:
        # Restore env so we don't leak the override into the parent process.
        os.environ.clear()
        os.environ.update(state_handle_env)

    runtime_md = render_runtime_summary_md(runtime_summary)
    ce_usage_md = render_ce_usage_md(runtime_summary.get("ce_usage") or [])
    doc_contract_rows = check_doc_contract(repo)
    doc_contract_md = render_doc_contract_md(doc_contract_rows, ce_installed)

    # Pass CE usage counts to playbook so per-section hints can include
    # "(N invocations tracked)" annotations.
    usage_counts = {row["actor_name"]: row["count"]
                    for row in (runtime_summary.get("ce_usage") or [])}
    playbook_md = ce_playbook.render(profile, ce_installed, usage_counts=usage_counts)

    if not args.quiet:
        print(f"alc_init: ce_plugin_installed={ce_installed} "
              f"tracked_ce_skills={len(usage_counts)} "
              f"doc_contract_missing={sum(1 for r in doc_contract_rows if not r['found'])}",
              file=sys.stderr)

    repo_id = StateHandle.repo_id(repo)
    body = render_session_context(
        profile, mcp_status,
        playbook_md=playbook_md,
        runtime_md=runtime_md,
        ce_usage_md=ce_usage_md,
        doc_contract_md=doc_contract_md,
    )
    out = write_session_context(state_root, repo_id, body)
    if not args.quiet:
        print(f"alc_init: wrote {out}", file=sys.stderr)

    summary = {
        "repo": str(repo),
        "state_root": str(state_root),
        "repo_id": repo_id,
        "profile": profile,
        "deps": deps_status,
        "mcp": mcp_status,
        "ce_plugin_installed": ce_installed,
        "ce_skill_usage_tracked": len(usage_counts),
        "doc_contract_missing": sum(1 for r in doc_contract_rows if not r["found"]),
        "session_context": str(out),
    }
    print(json.dumps(summary, indent=2, sort_keys=True))
    # Exit non-zero if MCP is broken in a way the user should know about.
    if mcp_status["status"] in {"green", "skipped"}:
        return 0
    return 1


if __name__ == "__main__":
    raise SystemExit(main())
