#!/usr/bin/env python3
"""arq-learning-replay — rehydrate operator-correction corpus from substrate.

Smallest viable bridge between the per-session emission discipline
(``arqera_learning_gap_recorded`` acts, one per operator-directive turn)
and the not-yet-built bootstrap-time injection (per
``feedback_every_operator_interaction_records_a_learning_gap.md`` Phase 1).

Renders to stdout. Future Twin sessions can pipe this output into their
context manually until ``scripts/plug-in-live-query.sh`` is extended
to do it at session start.

Usage:
    scripts/arq-learning-replay                       # last 24h
    scripts/arq-learning-replay --since 2026-05-16    # since date
    scripts/arq-learning-replay --since-last-close    # since most recent session_close
    scripts/arq-learning-replay --limit 50            # default 20

Substrate query target:
    arq://act/arqera_learning_gap_recorded/*

Authority: read-only substrate query via `twin index`. No state mutation.
"""

from __future__ import annotations

import argparse
import json
import subprocess
import sys
from datetime import datetime, timedelta, timezone


def run_twin_index(act_type: str, since_iso: str, limit: int) -> list[dict]:
    """Call `twin index --json` for an act type since a timestamp."""
    cmd = [
        "twin",
        "--use-keychain",
        "index",
        "--class",
        "act",
        "--type",
        act_type,
        "--since",
        since_iso,
        "--limit",
        str(limit),
        "--json",
    ]
    try:
        out = subprocess.check_output(cmd, text=True, timeout=15)
    except subprocess.CalledProcessError as exc:
        print(f"[arq-learning-replay] twin index failed: {exc}", file=sys.stderr)
        return []
    except subprocess.TimeoutExpired:
        print("[arq-learning-replay] twin index timed out (substrate slow)", file=sys.stderr)
        return []
    try:
        data = json.loads(out)
    except json.JSONDecodeError as exc:
        print(f"[arq-learning-replay] failed to parse JSON: {exc}", file=sys.stderr)
        return []
    if isinstance(data, list):
        return data
    if isinstance(data, dict):
        return data.get("results") or data.get("items") or []
    return []


def resolve_since_last_close() -> str | None:
    """Find the most recent session_close act and return its issued_at."""
    rows = run_twin_index(
        "session_close",
        (datetime.now(timezone.utc) - timedelta(days=7)).isoformat().replace("+00:00", "Z"),
        20,
    )
    if not rows:
        return None
    # Most recent first by issued_at
    rows.sort(key=lambda r: r.get("issued_at", ""), reverse=True)
    return rows[0].get("issued_at")


def render(rows: list[dict]) -> None:
    """Render the learning-gap corpus to stdout."""
    if not rows:
        print("[arq-learning-replay] no learning-gap acts in window.")
        return

    rows.sort(key=lambda r: r.get("issued_at", ""))

    print(f"[arq-learning-replay] {len(rows)} learning-gap act(s) since query window:\n")

    for row in rows:
        addr = row.get("address", "<no-address>")
        issued = row.get("issued_at", "<no-ts>")
        ref = row.get("reference") or row.get("ref") or ""
        # Try to surface key fields from payload_preview
        preview = row.get("payload_preview", "") or ""
        # Compact display
        print(f"  {issued}  {addr}")
        if ref and ref not in addr:
            print(f"    ref: {ref}")
        if preview:
            # Extract operator_phrasing_verbatim if present
            try:
                p = json.loads(preview, strict=False)
                quote = p.get("operator_phrasing_verbatim", "")
                if quote:
                    print(f'    "{quote[:200]}..."' if len(quote) > 200 else f'    "{quote}"')
                interp = p.get("twin_interpretation", "")
                if interp:
                    print(f"    interpretation: {interp[:160]}..." if len(interp) > 160 else f"    interpretation: {interp}")
                disp = p.get("disposition", "")
                if disp:
                    print(f"    disposition: {disp}")
            except (json.JSONDecodeError, ValueError):
                # Preview is truncated JSON; show raw
                snippet = preview[:200].replace("\n", " ")
                print(f"    preview: {snippet}...")
        print()


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
    parser.add_argument("--since", help="ISO timestamp lower bound (default: 24h ago)")
    parser.add_argument(
        "--since-last-close",
        action="store_true",
        help="set --since to most recent session_close act issued_at (last 7d)",
    )
    parser.add_argument("--limit", type=int, default=20, help="max acts (default 20)")
    args = parser.parse_args()

    if args.since_last_close:
        last_close = resolve_since_last_close()
        if not last_close:
            print(
                "[arq-learning-replay] no session_close in last 7d; falling back to 24h",
                file=sys.stderr,
            )
            since_iso = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat().replace(
                "+00:00", "Z"
            )
        else:
            since_iso = last_close
            print(f"[arq-learning-replay] querying since session_close at {since_iso}", file=sys.stderr)
    elif args.since:
        since_iso = args.since
    else:
        since_iso = (datetime.now(timezone.utc) - timedelta(hours=24)).isoformat().replace(
            "+00:00", "Z"
        )

    rows = run_twin_index("arqera_learning_gap_recorded", since_iso, args.limit)
    render(rows)
    return 0


if __name__ == "__main__":
    sys.exit(main())
