#!/usr/bin/env python3
"""arq — the single ARQERA CLI.

You ask; ARQERA routes. Every answer carries context (why, alternatives,
freshness, authority, recall path) so it's trustworthy, not bare.

Honours the four ARQERA promises:
  arq://body/principle/arqera-answers-v1     — find the right worker/surface
  arq://body/principle/arqera-remembers-v1   — point-in-time, never just yes/no
  arq://body/principle/arqera-learns-v1      — gets better with use
  arq://body/principle/arqera-earns-v1       — only what you authorise

Decision body: arq://body/decision/one-cli-not-many-2026-05-25

Usage:
  arq <natural-language-question-or-action>

Examples:
  arq is the catalogue healthy?
  arq show me ui routes for engineers
  arq find me a slack channel for engineering
  arq how many wieldable things do we have?
  arq what does arqera promise me?

Replaces (deleted in same commit):
  arq-ask · arq-wieldability-catalogue · arq-wieldability-completeness
"""
from __future__ import annotations

import argparse
import json
import re
import subprocess
import sys
import urllib.error
import urllib.request
from collections import Counter, defaultdict
from datetime import UTC, datetime
from pathlib import Path

ADDRESSING_BASE = "https://addressing.arqera.io"

WIELDABLE_KINDS = [
    "wieldable_ui_route", "wieldable_agent", "wieldable_ui_component",
    "wieldable_component_worker", "wieldable_api_primitive",
    "wieldable_language", "wieldable_external_tool", "wieldable_human_worker",
    "wieldable_training_loop", "wieldable_authority_grant",
    "wieldable_customer_need",
]


# ── intent classification (substrate-canonical via arq://body/intent_grammar/v2) ──

ROLE_HINT = re.compile(r"\bfor (visitor|prospect|customer|operator|admin|engineer|sales|staff)\b", re.I)
KIND_HINT = re.compile(r"\b(ui routes?|components?|agents?|workers?|integrations?|primitives?|languages?|tools?|training loops?|grants?|customer needs?)\b", re.I)
DISPATCH_INTENT = re.compile(r"\b(dispatch|who should|which worker|route to|parallel|delegate to whom)\b", re.I)

# Hardcoded fallback — used if substrate intent_grammar is unreachable at startup.
# Per arq://doc/principle/arqera-contract-enforcement-v1: substrate is canonical;
# this fallback exists so arq still works offline / during bootstrap.
_FALLBACK_HANDLER_ROUTING = {
    "priority_order": ["contract", "drift", "identity", "doctrine", "audit", "catalogue", "route"],
    "handlers": {
        "contract": {"patterns": ["contract status", "contract", "what am i (?:doing|inside)", "acceptance criteria", "am i complete", "what.s the contract"]},
        "drift": {"patterns": ["drift recovery", "where (?:was|did) (?:i|we)", "pick up", "resume", "what.s next", "where (?:am|are) (?:i|we)", "continue from"]},
        "identity": {"patterns": ["who am i", "whoami", "my twin", "what can (?:i|my twin)", "what scopes?", "what authority", "my identity", "my peer"]},
        "doctrine": {"patterns": ["promise", "principle", "doctrine", "what does arqera", "how does arqera", "values"]},
        "audit": {"patterns": ["healthy", "broken", "integrity", "orphan", "zombie", "missing", "drift", "unclassified", "stale", "complete", "completeness"]},
        "catalogue": {"patterns": ["show", "list", "count", "how many", "what (?:ui|agents|workers|integrations|languages)"]},
        "route": {"patterns": ["find", "where", "how do i", "how can i", "send", "post", "do", "delegate", "run", "execute", "trigger", "launch", "press", "click"]},
    },
    "default_handler": "route",
    "_source": "fallback (substrate intent_grammar unreachable)",
}


def _load_handler_routing() -> dict:
    """Fetch handler routing from substrate intent_grammar/v2; fallback if unreachable."""
    try:
        with urllib.request.urlopen(
            f"{ADDRESSING_BASE}/address/body/intent_grammar/v2?full=true", timeout=3
        ) as resp:
            d = json.load(resp)
        payload = d.get("payload") or {}
        routing = payload.get("arq_handler_routing")
        if routing and routing.get("priority_order") and routing.get("handlers"):
            routing["_source"] = "arq://body/intent_grammar/v2"
            return routing
    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, ValueError, KeyError):
        pass
    return _FALLBACK_HANDLER_ROUTING


_HANDLER_ROUTING = _load_handler_routing()
_COMPILED_PATTERNS = {
    name: [re.compile(rf"\b({p})\b", re.I) for p in spec["patterns"]]
    for name, spec in _HANDLER_ROUTING["handlers"].items()
}


def classify_intent(text: str) -> str:
    """Substrate-driven intent classification per arq://body/intent_grammar/v2.

    Walks priority_order; first handler whose patterns match wins.
    Falls back to default_handler if none match.
    """
    # Dispatch intent checked FIRST (introduced post-grammar/v2)
    if DISPATCH_INTENT.search(text):
        return "dispatch"
    for handler_name in _HANDLER_ROUTING["priority_order"]:
        for pattern in _COMPILED_PATTERNS.get(handler_name, []):
            if pattern.search(text):
                return handler_name
    return _HANDLER_ROUTING.get("default_handler", "route")


def handle_dispatch(text: str) -> dict:
    """'arq dispatch <intent>' — read worker_capacity_catalog + match capabilities.

    Returns the ranked workers + dispatch decision. Does NOT execute the
    dispatch yet (waiting for substrate_client v2 signed writes). This is
    the read-only dispatch decision primitive.

    Per arq://body/arc/arqera-as-glue-completeness.
    """
    catalog = fetch("body/worker_capacity_catalog/v1") or {}
    workers = catalog.get("workers") or []
    if not workers:
        return emit_context(
            answer={"error": "worker_capacity_catalog/v1 not reachable"},
            why=["substrate query failed"], alternatives=[],
            freshness=f"live at {datetime.now(UTC).isoformat()}",
            authority={"signer": "twin (read-only)", "scope": "dispatch decision"},
            recall=["check arq://body/worker_capacity_catalog/v1"],
        )

    # Strip the trigger words from intent so they don't pollute scoring
    intent = re.sub(r"\b(dispatch|who should|which worker|route to|parallel|delegate to whom)\b", "", text, flags=re.I).strip()
    intent_lower = intent.lower()
    tokens = re.findall(r"\w{3,}", intent_lower)

    ranked = []
    for w in workers:
        caps = w.get("capabilities") or []
        score = 0
        matches = []
        for c in caps:
            c_lower = c.lower()
            for t in tokens:
                if t in c_lower:
                    score += 2
                    matches.append(c)
        if score > 0:
            ranked.append({
                "worker_id": w.get("id"),
                "kind": w.get("kind"),
                "score": score,
                "matched_capabilities": list(set(matches)),
                "latency_tier": w.get("latency_tier"),
                "cost_tier": w.get("cost_tier"),
                "dispatch_endpoint": (w.get("dispatch_endpoint") or "")[:120],
            })
    ranked.sort(key=lambda x: -x["score"])

    return emit_context(
        answer={
            "intent": intent or "<empty>",
            "top_workers": ranked[:5],
            "fallback_if_none_match": "arq route <intent> (uses keyword scoring against wieldable surfaces)",
            "dispatch_execution_status": "READ-ONLY decision; execution awaits substrate_client v2 (signed writes in Python). Today: orchestrator-Claude reads this decision + invokes the recommended worker manually.",
            "total_workers_in_catalog": len(workers),
        },
        why=[f"matched {len(ranked)} workers against intent tokens {tokens[:5]}",
             "score = 2 per capability-keyword match"],
        alternatives=[{"name": "arq route <intent>", "why_consider": "find a substrate surface (UI route, agent, etc.)"}],
        freshness=f"worker_capacity_catalog/v1 + intent at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (read-only dispatch decision)", "scope": "no execution yet"},
        recall=["arq dispatch <intent>", "twin doc fetch arq://body/worker_capacity_catalog/v1?full=true"],
    )


# ── substrate primitives ──────────────────────────────────────────────────

def fetch(addr: str) -> dict | None:
    """Fetch a substrate body with full payload."""
    try:
        with urllib.request.urlopen(
            f"{ADDRESSING_BASE}/address/{addr}?full=true", timeout=5
        ) as resp:
            d = json.load(resp)
    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, ValueError):
        return None
    p = d.get("payload")
    return p if isinstance(p, dict) else None


def index(class_: str, type_: str, limit: int = 1000) -> list[dict]:
    """List substrate entities of a class/type."""
    r = subprocess.run(
        ["twin", "--use-keychain", "index",
         "--class", class_, "--type", type_, "--limit", str(limit), "--json"],
        capture_output=True, text=True, check=False, timeout=15,
    )
    if r.returncode != 0:
        return []
    try:
        return json.loads(r.stdout) or []
    except (json.JSONDecodeError, ValueError):
        return []


# ── contextual output shape (the key contract) ──────────────────────────────

def emit_context(answer: dict, why: list[str], alternatives: list[dict],
                 freshness: str | None, authority: dict | None,
                 recall: list[str]) -> dict:
    """Every arq response carries this shape."""
    return {
        "answer": answer,
        "why_this_answer": why,
        "alternatives_considered": alternatives,
        "freshness": freshness,
        "authority": authority,
        "recall_path": recall,
    }


# ── intent handlers ────────────────────────────────────────────────────────

def handle_contract(text: str) -> dict:
    """'arq contract status' — show active work_contract bodies + criteria.

    Per arq://doc/principle/arqera-contract-enforcement-v1.
    """
    contract_refs = [r.get("ref") for r in index("body", "work_contract", limit=20)
                     if r.get("ref")]
    contracts = []
    for ref in contract_refs:
        p = fetch(f"body/work_contract/{ref}")
        if not p or p.get("completion_state") in ("complete", "abandoned"):
            continue
        contracts.append({
            "address": f"arq://body/work_contract/{ref}",
            "stated_goal": (p.get("stated_goal_quoted_verbatim") or p.get("stated_goal", ""))[:300],
            "acceptance_criteria_count": len(p.get("acceptance_criteria", [])),
            "completion_state": p.get("completion_state", "open"),
            "active_arcs": p.get("active_arcs", []),
            "no_self_declared_done": p.get("no_self_declared_done", True),
        })
    return emit_context(
        answer={
            "active_contracts": contracts,
            "the_rule": "Worker MAY NOT self-declare complete. Operator attestation OR substrate adjudicator approval required.",
        },
        why=["substrate index of body/work_contract; filtered to completion_state not in (complete, abandoned)"],
        alternatives=[{"name": "arq drift recovery", "why_consider": "for arc-level next actions + pressure"}],
        freshness=f"live query at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (self-introspection)", "scope": "read-only contract query"},
        recall=["arq contract status", "arq am i complete", "arq what.s the contract"],
    )


def handle_drift(text: str) -> dict:
    """'arq drift recovery' — top arcs by pressure with next exact actions.

    Per arq://doc/principle/arqera-contract-enforcement-v1: drift never loses
    the destination because arcs ARE the destination, queryable from substrate.
    """
    arc_refs = [r.get("ref") for r in index("body", "arc", limit=30) if r.get("ref")]
    arcs = []
    for ref in arc_refs:
        p = fetch(f"body/arc/{ref}")
        if p:
            arcs.append({
                "ref": ref,
                "name": p.get("name", ""),
                "phase": p.get("current_phase", "?"),
                "pressure": p.get("pressure_score") or 0,
                "conclusion": p.get("conclusion", "")[:200],
                "current_state": p.get("current_state_summary", "")[:200],
                "next_action": (p.get("next_bounded_action") or {}).get("description", ""),
                "next_command": (p.get("next_bounded_action") or {}).get("exact_command", ""),
                "blocked_on": p.get("blocked_on", []),
                "acceptance_criteria_count": len(p.get("acceptance_criteria", [])),
            })
    arcs.sort(key=lambda a: -a["pressure"])
    return emit_context(
        answer={
            "active_arcs_count": len(arcs),
            "top_arcs_by_pressure": arcs[:5],
            "all_arcs_summary": [{"ref": a["ref"], "pressure": a["pressure"], "phase": a["phase"]} for a in arcs],
        },
        why=["substrate index of body/arc/* sorted by pressure_score (staleness × importance × user-pull)"],
        alternatives=[{"name": "arq contract status", "why_consider": "for contract-level acceptance criteria"}],
        freshness=f"live query at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (self-introspection)", "scope": "read-only arc query"},
        recall=["arq drift recovery", "arq where was i", "arq what.s next", "arq continue"],
    )


def handle_identity(text: str) -> dict:
    """'who am i?' / 'what can my twin do?' — twin self-introspection."""
    addr = None
    try:
        r = subprocess.run(
            ["twin", "--use-keychain", "status"],
            capture_output=True, text=True, check=False, timeout=5,
        )
        if r.returncode == 0:
            for line in r.stdout.splitlines():
                if line.strip().startswith("address") and ":" in line:
                    addr = line.split(":", 1)[1].strip()
                    break
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        pass

    grammar = fetch("body/scope_claim_grammar/v1") or {}
    adapters = list((grammar.get("allowed_verbs_per_adapter") or {}).keys())
    grants = [r.get("ref") for r in index("body", "wieldable_authority_grant", limit=20) if r.get("ref")]

    return emit_context(
        answer={
            "my_twin": addr or "(could not resolve — is twin installed?)",
            "principle": "arq://doc/principle/adapter-identity-as-scope-claim-v1",
            "what_my_twin_can_claim_as": adapters,
            "claim_grammar": "arq://body/scope_claim_grammar/v1",
            "authority_grants_in_substrate": grants[:10],
            "truncated": len(grants) > 10,
        },
        why=["twin status (Keychain) + scope_claim_grammar/v1 + wieldable_authority_grant index"],
        alternatives=[{"name": "arq what does arqera promise", "why_consider": "doctrine vs identity"}],
        freshness=f"live query at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin peer (this Mac)", "scope": "self-introspection (no external API)"},
        recall=["arq who am i", "arq what can my twin do", "arq what scopes", "twin status"],
    )


def handle_doctrine(text: str) -> dict:
    """'what does arqera promise me?' / 'show me principles'"""
    promises = []
    for slug in ("arqera-answers-v1", "arqera-remembers-v1", "arqera-learns-v1", "arqera-earns-v1"):
        p = fetch(f"body/principle/{slug}")
        if p:
            promises.append({"address": f"arq://body/principle/{slug}",
                             "name": p.get("name"),
                             "promise": p.get("human_promise")})
    return emit_context(
        answer={"the four arqera promises": promises},
        why=["consolidated 7+1 prior principles into 4 human-shaped promises (decision: arq://body/decision/one-cli-not-many-2026-05-25 sibling)"],
        alternatives=[],
        freshness=f"emitted 2026-05-25; supersedes 7 legacy principles in docs/principles/*.md (fossils)",
        authority={"signer": "operator peer", "scope": "doctrine"},
        recall=[f"curl '{ADDRESSING_BASE}/address/body/principle/<slug>?full=true'"],
    )


def handle_audit(text: str) -> dict:
    """'is the catalogue healthy?' — orphan/zombie + provisional breakdown."""
    from pathlib import Path
    repo = Path(__file__).resolve().parents[2]

    # Discover code-side
    code_counts: dict[str, set] = defaultdict(set)
    for p in (repo / "frontend" / "app").rglob("page.tsx"):
        rel = str(p.relative_to(repo))
        s = re.sub(r"\(([^)]+)\)", r"\1", rel.replace("frontend/app", "").replace("/page.tsx",""))
        s = re.sub(r"\[([^\]]+)\]", r"id-\1", s)
        s = re.sub(r"[^a-zA-Z0-9-]", "-", s.strip("/"))
        code_counts["wieldable_ui_route"].add(re.sub(r"-+", "-", s).strip("-") or "root")
    for f in (repo / "frontend" / "components" / "ui").glob("*.tsx"):
        slug = re.sub(r"\.tsx$", "", f.name)
        slug = re.sub(r"([a-z])([A-Z])", r"\1-\2", slug).lower()
        slug = re.sub(r"[^a-z0-9-]", "-", slug)
        code_counts["wieldable_ui_component"].add("ui-" + re.sub(r"-+","-",slug).strip("-"))

    orphans, zombies = {}, {}
    for kind, code_set in code_counts.items():
        cat_set = {r.get("ref","") for r in index("body", kind) if r.get("ref")}
        o = sorted(code_set - cat_set)
        z = sorted(cat_set - code_set)
        if o or z:
            orphans[kind] = o
            zombies[kind] = z

    health = "healthy" if not (orphans or zombies) else "drift detected"

    return emit_context(
        answer={
            "catalogue_health": health,
            "orphans_by_kind": orphans,
            "zombies_by_kind": zombies,
        },
        why=["bidirectional reconciliation: discovered code-side vs substrate-indexed; mismatches are invariant breaches per wieldability-is-the-boundary"],
        alternatives=[{"name": "ignore", "why_rejected": "violates ARQERA Answers promise"}],
        freshness=f"computed live at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (operator peer)", "scope": "read-only audit"},
        recall=["arq is the catalogue healthy", "arq show me orphans", "arq help me fix drift"],
    )


def handle_catalogue(text: str) -> dict:
    """'show me ui routes for engineers' — filtered list."""
    role_match = ROLE_HINT.search(text)
    role = role_match.group(1).lower() if role_match else None
    kind_match = KIND_HINT.search(text)
    kind_word = kind_match.group(1).lower() if kind_match else None

    # Map word → kind
    kind_map = {"ui route": "wieldable_ui_route", "ui routes": "wieldable_ui_route",
                "component": "wieldable_ui_component", "components": "wieldable_ui_component",
                "agent": "wieldable_agent", "agents": "wieldable_agent",
                "worker": "wieldable_component_worker", "workers": "wieldable_component_worker",
                "integration": "wieldable_api_primitive", "integrations": "wieldable_api_primitive",
                "primitive": "wieldable_api_primitive", "primitives": "wieldable_api_primitive",
                "language": "wieldable_language", "languages": "wieldable_language",
                "tool": "wieldable_external_tool", "tools": "wieldable_external_tool",
                "training loop": "wieldable_training_loop", "training loops": "wieldable_training_loop",
                "grant": "wieldable_authority_grant", "grants": "wieldable_authority_grant",
                "customer need": "wieldable_customer_need", "customer needs": "wieldable_customer_need"}
    kinds = [kind_map[kind_word]] if kind_word in kind_map else WIELDABLE_KINDS

    matches, total_scanned = [], 0
    for kind in kinds:
        for rec in index("body", kind, limit=500):
            total_scanned += 1
            ref = rec.get("ref","")
            if not ref:
                continue
            payload = fetch(f"body/{kind}/{ref}")
            if not payload:
                continue
            if role:
                roles = payload.get("role", [])
                if not (role in roles or "any" in roles):
                    continue
            matches.append({"kind": kind.replace("wieldable_",""), "ref": ref,
                            "path_or_url": payload.get("path") or payload.get("home_url") or "",
                            "name": payload.get("name") or payload.get("description",""),
                            "task": payload.get("task", []),
                            "role": payload.get("role", [])})

    return emit_context(
        answer={"matches_count": len(matches), "matches": matches[:30],
                "truncated": len(matches) > 30},
        why=[f"queried substrate kinds: {kinds[:3]}{'…' if len(kinds)>3 else ''}",
             f"applied role filter: {role}" if role else "no role filter applied"],
        alternatives=[{"name": "use arq audit", "why_consider": "if filter returns 0 you may have orphans"}],
        freshness=f"live query at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (operator peer)", "scope": "read-only catalogue query"},
        recall=["arq show me <kind> for <role>", "arq is the catalogue healthy"],
    )


def handle_route(text: str) -> dict:
    """'find me a slack channel for engineering' — intent → ranked surfaces."""
    # Score every wieldable surface against keyword presence in payload
    text_lower = text.lower()
    candidates = []
    for kind in ("wieldable_agent", "wieldable_api_primitive",
                 "wieldable_ui_route", "wieldable_component_worker",
                 "wieldable_external_tool"):
        for rec in index("body", kind, limit=500):
            ref = rec.get("ref","")
            if not ref:
                continue
            payload = fetch(f"body/{kind}/{ref}")
            if not payload:
                continue
            # Score: keyword overlap between intent and (ref + name + description + task)
            haystack = (ref + " " + (payload.get("name") or "") + " " +
                        (payload.get("description") or "") + " " +
                        " ".join(payload.get("task", []))).lower()
            tokens = re.findall(r"\w{3,}", text_lower)
            score = sum(1 for t in tokens if t in haystack)
            if score > 0:
                candidates.append((score, kind, ref, payload))
    candidates.sort(key=lambda x: -x[0])
    top = candidates[:5]

    return emit_context(
        answer={"best_match": ({"kind": top[0][1].replace("wieldable_",""),
                                 "ref": top[0][2],
                                 "name": top[0][3].get("name"),
                                 "path_or_home_url": top[0][3].get("path") or top[0][3].get("home_url"),
                                 "score": top[0][0]} if top else None),
                "alternatives": [{"kind": k.replace("wieldable_",""), "ref": r, "score": s,
                                  "name": p.get("name")} for (s, k, r, p) in top[1:]]},
        why=[f"keyword overlap scoring against {sum(1 for _ in candidates)} candidates",
             f"top score: {top[0][0]}" if top else "no matches"],
        alternatives=[{"name": "arq show me <kind>", "why_consider": "if no match, browse the catalogue"}],
        freshness=f"live query at {datetime.now(UTC).isoformat()}",
        authority={"signer": "twin (operator peer)", "scope": "read-only route search"},
        recall=["arq <intent>", "arq show me <kind>", "arq how does <surface-ref> work"],
    )


# ── main ──────────────────────────────────────────────────────────────────

def main() -> int:
    p = argparse.ArgumentParser(prog="arq",
        description="ARQERA's single CLI — you ask, arq routes. Every answer carries context.")
    p.add_argument("intent", nargs="*", help="natural-language question or action")
    p.add_argument("--json", action="store_true", help="machine-readable output")
    p.add_argument("--help-promises", action="store_true",
                   help="show the 4 arqera promises and exit")
    args = p.parse_args()

    if args.help_promises or (args.intent and " ".join(args.intent).strip().lower() in
                              ("promises", "what does arqera promise me")):
        ctx = handle_doctrine(" ".join(args.intent))
    else:
        if not args.intent:
            p.print_help()
            return 0
        text = " ".join(args.intent)
        intent = classify_intent(text)
        if intent == "dispatch":
            ctx = handle_dispatch(text)
        elif intent == "contract":
            ctx = handle_contract(text)
        elif intent == "drift":
            ctx = handle_drift(text)
        elif intent == "identity":
            ctx = handle_identity(text)
        elif intent == "doctrine":
            ctx = handle_doctrine(text)
        elif intent == "audit":
            ctx = handle_audit(text)
        elif intent == "catalogue":
            ctx = handle_catalogue(text)
        else:
            ctx = handle_route(text)

    if args.json:
        print(json.dumps(ctx, indent=2, default=str))
        return 0

    # Human-readable contextual output
    print(f"\n  ── arq · {ctx['why_this_answer'][0] if ctx['why_this_answer'] else 'response'} ──\n")
    ans = ctx["answer"]
    if isinstance(ans, dict):
        for k, v in ans.items():
            if isinstance(v, (dict, list)) and v:
                print(f"  {k}:")
                lines = json.dumps(v, indent=4, default=str).splitlines()
                for line in lines[:30]:
                    print(f"    {line}")
                if len(lines) > 30:
                    print(f"    … +{len(lines)-30} more lines (use --json for full)")
            else:
                print(f"  {k}: {v}")
    else:
        print(f"  answer: {ans}")

    print(f"\n  why this answer:")
    for w in ctx["why_this_answer"]:
        print(f"    • {w}")

    if ctx.get("freshness"):
        print(f"\n  freshness: {ctx['freshness']}")

    if ctx.get("authority"):
        print(f"  authority: {ctx['authority']}")

    if ctx.get("recall_path"):
        print(f"\n  how to recall more:")
        for r in ctx["recall_path"]:
            print(f"    {r}")

    return 0


if __name__ == "__main__":
    sys.exit(main())
