#!/usr/bin/env python3
"""arq-delegate v0.1 · LLM-to-LLM delegation entry point · ARQERA primitive.

Per canonized principle:
  arq://doc/principle/llm-to-llm-delegation-under-one-envelope-v1

Per substrate-attested classification 2026-05-21:
  arq://doc/snapshot/modal-vs-homebase-vs-arqera-speed-mechanism-classification-2026-05-21

This is the SOLE LEGAL entry point for LLM-to-LLM delegation in ARQERA.
Out-of-band LLM-to-LLM communication (any sub-LLM dispatch NOT routed
through this primitive) is "agents chatting" — substrate-refused.

## v0.1 refactor (vs v0 in #4009)

v0 hardcoded a Modal-only HTTP endpoint (ARQ_INFERENCE_API). This was
WRONG. Modal is one HOSTING substrate among many (the 5 fine-tuned
Qwen3-8B personas happen to live there at $50/mo · per Book of ARQERA
this is an Article 5 violation while Homebase is <5% utilized). The
canonical ARQERA inference architecture per llm_provider_routing_service:

  Tier 0 (Homelab):  vLLM on dgx-spark/pc-wsl   (free · sovereign)
  Tier 0.5 (Free):   Cerebras · Groq · 10 more  (free · rate-limited)
  Tier 1 (Budget):   Groq · DeepSeek · MiniMax  (~$0.20/M)
  Tier 2 (Balanced): Mistral · OpenRouter       (~$1/M)
  Tier 3 (Premium):  Claude · OpenAI            (~$3+/M)

v0.1 stops bypassing this. It dispatches via `arq-call <worker> <verb>`
(the existing mesh primitive). Worker selection delegates to the mesh
catalogue + cost-aware routing. arq-delegate v0.1 adds delegation
semantics (delegation_requested · delegated_claim_returned · anti-chat
enforcement) ON TOP of arq-call · it does NOT re-implement HTTP dispatch.

## Three load-bearing rules enforced

1. NO authority by delegation
   Sub-LLMs return claim + evidence ONLY. AUTHORITY_WRITE_VERBS regex
   catches authority-claim language without evidence_refs → refusal.

2. Every delegated claim MUST return evidence into the same envelope
   evidence_refs parsed from claim · empty = substrate-attested as
   zero-weight refusal.

3. Anti-chat: mandatory substrate sequence
   delegation_requested emitted BEFORE arq-call dispatch ·
   delegated_claim_returned AFTER · NO bypass of arq-call.

## Authority bounds

  ALLOWED:  emit delegation evidence acts · dispatch via arq-call to any
            worker in mesh catalogue · parse claim for evidence_refs ·
            return claim to caller
  DENIED:   merge_main · deploy · secret_mutation · substrate-write
            authority · bypass of arq-call · arbitrary URL override

## Worker allowlist (v0.1)

Workers exposed by the mesh catalogue (`backend/app/mesh/arq_call.py
_CATALOGUE`) — v0.1 does not maintain its own allowlist. Tier 0 Homebase
workers (vllm-dgx · vllm-wsl) are MISSING from the catalogue today ·
operator-tier surface emitted to wire them (separate scope from this
PR). Until wired · arq-delegate routes via free-tier (groq-free,
cerebras, mistral-free) or paid (claude-anthropic, openai, deepseek)
depending on `--worker` choice. Default: cheapest-via-mesh.

Usage:
  arq-delegate invoke \\
    --worker WORKER \\
    --prompt-file PATH \\
    --task-envelope arq://body/authorisation_envelope/... \\
    --requesting-worker arq://body/worker/... \\
    [--verb chat.completions] \\
    [--max-tokens N] \\
    [--expected-min-evidence-refs N]

Emits:
  arq://act/delegation_requested/<binding>      (before arq-call dispatch)
  arq://act/delegated_claim_returned/<binding>  (after · with evidence_refs)
  arq://act/delegation_cancelled/<binding>      (on dispatch failure)
  arq://act/delegation_authority_attempted/<binding>
                                                (refusal · if claim has
                                                authority-write verbs
                                                without evidence)
"""
from __future__ import annotations

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from datetime import datetime, timezone

POLICY_VERSION = "arq-delegate-v0.1-2026-05-21"
TWIN_BIN = os.environ.get("TWIN_BIN") or shutil.which("twin")
ARQ_CALL_BIN = os.environ.get("ARQ_CALL_BIN") or shutil.which("arq-call")

# Anti-chat enforcement: refuse sub-LLM claims that contain authority-write
# verbs WITHOUT evidence_refs · per rule 1 (no authority by delegation).
AUTHORITY_WRITE_VERBS = re.compile(
    r"\b(I will merge|I will deploy|I will approve|I have merged|I have deployed|"
    r"I have approved|I am rotating|I am setting the secret|I am changing the policy)\b",
    re.IGNORECASE,
)


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def now_compact() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")


_EMIT_WARN_LOGGED = False


def emit_act(act_type: str, ref: str, payload: dict) -> str | None:
    """Emit a signed substrate act. Loud stderr WARN if TWIN_BIN unavailable."""
    global _EMIT_WARN_LOGGED
    if not TWIN_BIN or not os.path.exists(TWIN_BIN):
        if not _EMIT_WARN_LOGGED:
            print(
                "arq-delegate: WARN twin binary not found "
                "(set TWIN_BIN or install `twin` on PATH) · audit acts will be skipped",
                file=sys.stderr,
            )
            _EMIT_WARN_LOGGED = True
        return None
    full_ref = f"{ref}-{now_compact()}"
    try:
        r = subprocess.run(
            [TWIN_BIN, "--use-keychain", "act", "emit", "act", act_type,
             full_ref,
             "--payload", json.dumps({**payload, "policy": POLICY_VERSION, "issued_at": now_iso()})],
            check=False, timeout=10, capture_output=True, text=True,
        )
        for line in (r.stdout or "").splitlines():
            if line.startswith("arq://act/"):
                return line.strip()
    except Exception:
        return None
    return None


_EVIDENCE_REF_RE = re.compile(r"arq://act/[a-z0-9_]+/[a-z0-9_\-:.]+", re.IGNORECASE)


def _parse_evidence_refs(claim_text: str) -> list[str]:
    """Extract arq://act/... evidence references from sub-LLM response."""
    return list(dict.fromkeys(_EVIDENCE_REF_RE.findall(claim_text)))


def _dispatch_via_arq_call(worker: str, verb: str, payload: dict) -> tuple[str | None, str]:
    """Dispatch via the existing mesh primitive arq-call.

    Returns (claim_text or None, raw_output_for_debug).
    """
    if not ARQ_CALL_BIN:
        return None, "arq-call binary not found · set ARQ_CALL_BIN or install on PATH"
    try:
        r = subprocess.run(
            [ARQ_CALL_BIN, worker, verb, "--payload", json.dumps(payload)],
            check=False, timeout=120, capture_output=True, text=True,
        )
        # arq-call emits envelope_sent / envelope_ack itself · stdout has the
        # upstream worker response · stderr has the dispatch summary.
        if r.returncode != 0:
            return None, (r.stderr or r.stdout or "")[:2000]
        # Try to extract chat-completion content from OpenAI-compat response.
        try:
            data = json.loads(r.stdout)
            choices = data.get("choices") or []
            if choices:
                return choices[0].get("message", {}).get("content"), r.stdout[:2000]
        except json.JSONDecodeError:
            pass
        # Fallback: return raw stdout if it's not JSON-parseable.
        return r.stdout, r.stdout[:2000]
    except Exception as e:
        return None, f"arq-call dispatch exception: {e}"


def cmd_invoke(args: argparse.Namespace) -> int:
    """Single bounded LLM-to-LLM delegation invocation via arq-call mesh dispatch."""
    if not os.path.exists(args.prompt_file):
        print(f"arq-delegate: ✗ prompt file not found: {args.prompt_file}", file=sys.stderr)
        return 2
    with open(args.prompt_file, "r") as f:
        prompt = f.read()

    binding_ref = (
        f"{args.worker.replace('/', '_')}-"
        f"{(args.requesting_worker or 'anon').replace('/', '_').replace(':', '_')}"
    )
    pre_addr = emit_act("delegation_requested", binding_ref, {
        "worker": args.worker,
        "verb": args.verb,
        "task_envelope": args.task_envelope,
        "requesting_worker": args.requesting_worker,
        "prompt_length": len(prompt),
        "max_tokens": args.max_tokens,
        "expected_min_evidence_refs": args.expected_min_evidence_refs,
        "dispatch_via": "arq-call (mesh)",
    })

    payload = {
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": args.max_tokens,
    }
    claim, debug = _dispatch_via_arq_call(args.worker, args.verb, payload)

    if claim is None:
        emit_act("delegation_cancelled", binding_ref, {
            "reason": "arq_call_dispatch_failed",
            "worker": args.worker,
            "verb": args.verb,
            "dispatch_debug": debug,
            "delegation_requested_act": pre_addr,
        })
        print(
            f"arq-delegate: ✗ arq-call dispatch failed for {args.worker}.{args.verb} · "
            f"see substrate-attested debug",
            file=sys.stderr,
        )
        print(debug, file=sys.stderr)
        return 1

    evidence_refs = _parse_evidence_refs(claim)
    authority_write_attempt = bool(AUTHORITY_WRITE_VERBS.search(claim))

    if authority_write_attempt and not evidence_refs:
        # Anti-chat / no-authority-by-delegation enforcement: sub-LLM tried to
        # claim authority-write without evidence · substrate-refuse.
        refusal_addr = emit_act("delegation_authority_attempted", binding_ref, {
            "worker": args.worker,
            "reason": "claim_contains_authority_write_verbs_without_evidence_refs",
            "claim_excerpt": claim[:500],
            "delegation_requested_act": pre_addr,
        })
        print(
            f"arq-delegate: ✗ worker attempted authority-write without evidence · "
            f"refusal: {refusal_addr}",
            file=sys.stderr,
        )
        return 1

    post_addr = emit_act("delegated_claim_returned", binding_ref, {
        "worker": args.worker,
        "verb": args.verb,
        "delegation_requested_act": pre_addr,
        "claim_length": len(claim),
        "evidence_refs": evidence_refs,
        "evidence_count": len(evidence_refs),
        "zero_weight": len(evidence_refs) == 0,
        "claim_excerpt": claim[:1000],
    })

    if args.expected_min_evidence_refs > 0 and len(evidence_refs) < args.expected_min_evidence_refs:
        print(
            f"arq-delegate: ⚠ claim returned but evidence_refs={len(evidence_refs)} "
            f"< expected_min={args.expected_min_evidence_refs} · substrate-attested as zero-weight",
            file=sys.stderr,
        )

    # Print claim to stdout for coordinator consumption.
    sys.stdout.write(claim)
    if not claim.endswith("\n"):
        sys.stdout.write("\n")
    if post_addr:
        print(f"arq-delegate: audit emitted at {post_addr}", file=sys.stderr)
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    sub = parser.add_subparsers(dest="verb_top", required=True)

    p_invoke = sub.add_parser("invoke", help="Single LLM-to-LLM delegation invocation via arq-call mesh")
    p_invoke.add_argument("--worker", required=True,
                          help="Mesh-catalogue worker name (see backend/app/mesh/arq_call.py _CATALOGUE · "
                               "e.g. groq-free · cerebras · mistral-free · modal-ara-agent · claude-anthropic)")
    p_invoke.add_argument("--verb", default="chat.completions",
                          help="Verb to dispatch (default: chat.completions)")
    p_invoke.add_argument("--prompt-file", required=True,
                          help="Path to prompt file (sub-LLM input)")
    p_invoke.add_argument("--task-envelope", required=True,
                          help="arq://body/authorisation_envelope/<window> · MUST be active envelope")
    p_invoke.add_argument("--requesting-worker", required=True,
                          help="arq://body/worker/<name> · identity issuing the delegation")
    p_invoke.add_argument("--max-tokens", type=int, default=2000,
                          help="Sub-LLM max output tokens (default: 2000)")
    p_invoke.add_argument("--expected-min-evidence-refs", type=int, default=0,
                          help="Minimum arq://act/... evidence references required · "
                               "claims below threshold are substrate-attested as zero-weight")
    p_invoke.set_defaults(func=cmd_invoke)

    args = parser.parse_args()
    return args.func(args)


if __name__ == "__main__":
    sys.exit(main())
