#!/usr/bin/env python3
"""arq-consolidation-gate v0 · refuses pure-add PRs · enforces consolidation.

Per operator directive 2026-05-21 (substrate_truth_reconciliation/
arqera-missing-consolidation-gate-and-identity-primitives-2026-05-20):

> "The UI work that was done on staging.arqera.io/ is a big bug since it is
>  just a collection of different texts and designs that just keep piling up
>  (no degradation whatsoever at all) · which means arqera as a clean system
>  is not functioning."

This primitive refuses to attest a customer-surface PR unless the PR body
cites at least one of:
  - Retires: <files/components/sections>
  - Unifies: <what was consolidated>
  - Replaces: <what was replaced>
  - Net-add (with explicit justification why no degradation is possible)

On success, emits arq://act/consolidation_evidence/<pr-N>-<sha> with the
extracted evidence. The merge gate can later require this act before
allowing customer-surface PRs to merge.

Scope (bounded · per operator directive consolidate-not-blow-up):
  - reads PR body via arq-github pr view (no GitHub mutation)
  - emits ONE substrate act on compliance
  - returns non-zero on missing evidence · no auto-modification
  - no merge / deploy / secret authority

Usage:
  arq-consolidation-gate verify --pr <N>
  arq-consolidation-gate verify --pr <N> --retroactive  (allows older PRs)

Compliance semantics:
  PR body MUST contain a "## Consolidation" section AND at least one of:
    - Retires: ... (non-empty)
    - Unifies: ... (non-empty)
    - Replaces: ... (non-empty)
    - Net-add: ... (non-empty justification)
"""
from __future__ import annotations

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from datetime import datetime, timezone

POLICY_VERSION = "arq-consolidation-gate-v0-2026-05-21"
# Portable binary resolution: env override → PATH lookup → None.
# When None, emit_act emits a loud stderr WARN and skips · never silent.
TWIN_BIN = os.environ.get("TWIN_BIN") or shutil.which("twin")
ARQ_GITHUB_BIN = os.environ.get("ARQ_GITHUB_BIN") or shutil.which("arq-github")


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def now_compact() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")


_EMIT_WARN_LOGGED = False


def emit_act(act_type: str, ref: str, payload: dict) -> str | None:
    global _EMIT_WARN_LOGGED
    if not TWIN_BIN or not os.path.exists(TWIN_BIN):
        if not _EMIT_WARN_LOGGED:
            print(
                "arq-consolidation-gate: WARN twin binary not found "
                "(set TWIN_BIN or install `twin` on PATH) · audit acts will be skipped",
                file=sys.stderr,
            )
            _EMIT_WARN_LOGGED = True
        return None
    try:
        r = subprocess.run(
            [TWIN_BIN, "--use-keychain", "act", "emit", "act", act_type,
             f"{ref}-{now_compact()}",
             "--payload", json.dumps({**payload, "policy": POLICY_VERSION, "issued_at": now_iso()})],
            check=False, timeout=10, capture_output=True, text=True,
        )
        for line in (r.stdout or "").splitlines():
            if line.startswith("arq://act/"):
                return line.strip()
    except Exception:
        return None
    return None


def fetch_pr_body(pr: int, owner: str, repo: str) -> tuple[str, str, str]:
    """Returns (body, head_sha, author_login) · empty strings on failure."""
    if not ARQ_GITHUB_BIN:
        print(
            "arq-consolidation-gate: ERROR arq-github binary not found "
            "(set ARQ_GITHUB_BIN or install `arq-github` on PATH) · cannot read PR body",
            file=sys.stderr,
        )
        return "", "", ""
    try:
        r = subprocess.run(
            [ARQ_GITHUB_BIN, "pr", "view", str(pr), "--owner", owner, "--repo", repo],
            check=False, timeout=15, capture_output=True, text=True,
        )
        try:
            data = json.loads(r.stdout)
            body = data.get("body") or ""
            head_sha = ((data.get("head") or {}).get("sha")) or data.get("head_sha") or ""
            author = ((data.get("user") or {}).get("login")) or data.get("author") or ""
            return body, head_sha, author
        except (json.JSONDecodeError, ValueError):
            return r.stdout or "", "", ""
    except Exception:
        return "", "", ""


CONSOLIDATION_HEADING = re.compile(r"^#{1,3}\s*Consolidation\b", re.IGNORECASE | re.MULTILINE)
# Match: bullet · optional `**` wrap · verb · optional `**` close · colon · body
# Handles "- **Retires:** ..." / "- Retires: ..." / "**Retires**: ..."
EVIDENCE_LINE = re.compile(
    r"^\s*[-*]?\s*\*{0,2}(?P<verb>Retires|Unifies|Replaces|Net-add)\*{0,2}\s*:\s*(?P<body>.+?)\s*\*{0,2}\s*$",
    re.IGNORECASE | re.MULTILINE,
)

# Patterns that look like an evidence value but are actually empty/not-applicable.
# Sentry on #3999 (thread PRRT_kwDORNq-1M6Ds6IH): "N/A · pure deletion" was
# slipping past the empty-check because it's a non-empty string · but it's a
# "this verb does not apply" marker · not real evidence.
_NA_PREFIXES = ("n/a", "n.a.", "na ", "not applicable", "doesn't apply", "does not apply")
_NA_EQUALS = ("none", "-", "n/a", "na", "n.a.", "nothing", "tbd")


def _is_placeholder_or_na(body_txt: str) -> bool:
    if not body_txt:
        return True
    low = body_txt.lower().strip()
    # Bare equality with NA markers
    if low in _NA_EQUALS:
        return True
    # Starts with NA marker followed by separator
    if any(low.startswith(p) for p in _NA_PREFIXES):
        return True
    # Unfilled template placeholder
    if body_txt.startswith("<") and body_txt.endswith(">"):
        return True
    return False


def extract_consolidation_section(body: str) -> tuple[bool, dict]:
    """Returns (compliant, extracted) · compliant True iff section found AND >=1 evidence line non-empty."""
    if not body:
        return False, {"reason": "empty_pr_body"}
    m = CONSOLIDATION_HEADING.search(body)
    if not m:
        return False, {"reason": "no_consolidation_section"}
    section_start = m.end()
    # Find next heading or end
    next_heading = re.search(r"^#{1,3}\s+\w", body[section_start:], re.MULTILINE)
    section_end = section_start + next_heading.start() if next_heading else len(body)
    section_text = body[section_start:section_end]

    evidence: dict[str, list[str]] = {"Retires": [], "Unifies": [], "Replaces": [], "Net-add": []}
    for em in EVIDENCE_LINE.finditer(section_text):
        verb_lc = em.group("verb").lower()
        body_txt = em.group("body").strip()
        # Strip residual markdown bold leakage at either end
        body_txt = re.sub(r"^\*{1,2}", "", body_txt)
        body_txt = re.sub(r"\*{1,2}$", "", body_txt).strip()
        # Filter empties · placeholders · not-applicable markers
        if _is_placeholder_or_na(body_txt):
            continue
        verb_norm = "Net-add" if verb_lc == "net-add" else verb_lc.title()
        evidence[verb_norm].append(body_txt)

    non_empty = {k: v for k, v in evidence.items() if v}
    if not non_empty:
        return False, {"reason": "section_present_but_no_evidence_lines", "section_preview": section_text[:240]}

    return True, {"evidence": non_empty}


def cmd_verify(args: argparse.Namespace) -> int:
    body, head_sha, author = fetch_pr_body(args.pr, args.owner, args.repo)
    if not body:
        print(f"arq-consolidation-gate: ✗ could not fetch PR #{args.pr} body", file=sys.stderr)
        return 1

    compliant, extracted = extract_consolidation_section(body)

    common_payload = {
        "pr": args.pr,
        "owner": args.owner,
        "repo": args.repo,
        "head_sha": head_sha,
        "author": author,
    }

    if compliant:
        addr = emit_act(
            "consolidation_evidence",
            f"{args.owner}-{args.repo}-{args.pr}-{head_sha[:12] or 'no-sha'}",
            {**common_payload, **extracted, "compliant": True},
        )
        print(f"arq-consolidation-gate: ✓ PR #{args.pr} cites consolidation evidence")
        for verb, items in extracted["evidence"].items():
            for it in items:
                print(f"  {verb}: {it[:100]}")
        if addr:
            print(f"  attested: {addr}")
        return 0

    addr = emit_act(
        "consolidation_evidence_missing",
        f"{args.owner}-{args.repo}-{args.pr}-{head_sha[:12] or 'no-sha'}",
        {**common_payload, **extracted, "compliant": False, "retroactive_allowed": args.retroactive},
    )
    print(f"arq-consolidation-gate: ✗ PR #{args.pr} missing consolidation evidence", file=sys.stderr)
    print(f"  reason: {extracted.get('reason')}", file=sys.stderr)
    print(f"  required: PR body must contain a '## Consolidation' section with at least one of:", file=sys.stderr)
    print(f"    Retires: <files/components/sections retired by this PR>", file=sys.stderr)
    print(f"    Unifies: <what was consolidated/unified>", file=sys.stderr)
    print(f"    Replaces: <what was replaced>", file=sys.stderr)
    print(f"    Net-add: <explicit justification why no degradation is possible>", file=sys.stderr)
    if addr:
        print(f"  attested: {addr}", file=sys.stderr)
    if args.retroactive:
        print(f"arq-consolidation-gate: --retroactive set · returning 0 anyway (drift tracked in substrate)", file=sys.stderr)
        return 0
    return 1


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    sub = parser.add_subparsers(dest="verb", required=True)

    p_verify = sub.add_parser("verify", help="Verify PR body cites consolidation evidence")
    p_verify.add_argument("--pr", type=int, required=True, help="PR number")
    p_verify.add_argument("--owner", default="Arqera-IO")
    p_verify.add_argument("--repo", default="ARQERA")
    p_verify.add_argument("--retroactive", action="store_true",
                          help="Track the gap in substrate but return 0 (for backfill of pre-gate PRs)")
    p_verify.set_defaults(func=cmd_verify)

    args = parser.parse_args()
    return args.func(args)


if __name__ == "__main__":
    raise SystemExit(main())
