#!/usr/bin/env python3
"""Promote a gate from a repo's approved-gates.md into the shared registry.

Reads a single gate (identified by gate_id) from a gates registry markdown file
produced by ``bin/export_gates`` and writes it as a JSON record into a shared
root at ``<shared-root>/gates/<gate-id>.json``. Other repos in a federation can
then inherit promoted gates from that shared root (P4-B).
"""
from __future__ import annotations

import argparse
import datetime as dt
import json
import re
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent))
from collect_hook_event import assert_regular_file_destination  # noqa: E402
from gates_inherit import REQUIRED_RECORD_FIELDS  # noqa: E402
from state_paths import atomic_rewrite  # noqa: E402


# Distinct from "not found" (3) and "block missing fields" (4) so callers can
# tell a same-gate_id / different-content collision apart from upstream
# malformedness. Mirrors the role of gates_inherit.EXIT_CONFLICT.
EXIT_CONFLICT = 5
# Distinct from --gates parse failures so callers can tell that the record
# the operator tried to build was itself invalid (e.g. empty --origin-repo).
EXIT_INVALID_RECORD = 6


# Each gate block emitted by ``bin/export_gates`` looks like:
#
#     - domain: cloudflare
#       gate_id: 2aed10be9612
#       gate_category: docs-check
#       gate: Re-read current Cloudflare docs before changing wrangler config.
#       level: 3
#
# We split on the leading ``- domain:`` marker and parse field lines inside
# each block. The next block (or end-of-file) terminates the current one.
GATE_BLOCK_SPLIT_RE = re.compile(r"(?m)^-\s+domain:\s*")
FIELD_RE = re.compile(r"^\s*([a-z_][a-z0-9_]*)\s*:\s*(.*?)\s*$", re.I)
GATE_ID_RE = re.compile(r"^[a-f0-9]{12}$")


def parse_blocks(text: str) -> list[dict[str, str]]:
    """Return one dict of field-name -> value per gate block in ``text``."""
    blocks: list[dict[str, str]] = []
    parts = GATE_BLOCK_SPLIT_RE.split(text)
    # parts[0] is the preamble before the first ``- domain:`` marker.
    for part in parts[1:]:
        first_line, _, rest = part.partition("\n")
        block: dict[str, str] = {"domain": first_line.strip()}
        for raw in rest.splitlines():
            # Stop at the next gate block (defensive: split already handles it)
            # or at a blank section break.
            stripped = raw.strip()
            if not stripped:
                # Blank lines inside a block are rare but tolerated; stop only
                # when we hit a non-indented non-field line (e.g. next ##).
                continue
            if stripped.startswith("##") or stripped.startswith("# "):
                break
            match = FIELD_RE.match(raw)
            if not match:
                continue
            block[match.group(1).lower()] = match.group(2)
        blocks.append(block)
    return blocks


def find_gate(gates_md: Path, gate_id: str) -> dict[str, str] | None:
    """Return the parsed block matching ``gate_id``, or None."""
    text = gates_md.read_text(encoding="utf-8")
    for block in parse_blocks(text):
        if block.get("gate_id") == gate_id:
            return block
    return None


def build_record(block: dict[str, str], origin_repo: str, note: str) -> dict[str, str]:
    record = {
        "domain": block["domain"],
        "gate_id": block["gate_id"],
        "gate_category": block.get("gate_category", ""),
        "gate": block.get("gate", ""),
        "origin_repo": origin_repo,
        "promoted_at": dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "note": note,
    }
    # Preserve non-default severity when the source block carries it. Older
    # gates (pre-level) round-trip with no level field — no default invented.
    level = block.get("level")
    if level:
        record["level"] = level
    return record


def validate_record_for_promote(record: dict) -> None:
    """Field-level validation a record must satisfy before reaching the shared
    registry. Mirrors gates_inherit.validate_record minus the requested-id
    cross-check (no expected id at promote time). Without this an empty or
    newline-bearing --origin-repo writes through, gates_inherit fails on the
    same record at every sibling repo, and federation is silently bricked
    until an operator hand-edits the JSON.
    """
    for field in REQUIRED_RECORD_FIELDS:
        value = record.get(field)
        if not isinstance(value, str) or not value:
            raise ValueError(f"missing or invalid field: {field}")
        if "\n" in value or "\r" in value:
            raise ValueError(f"field contains a newline: {field}")


def _records_match_ignoring_volatile(a: dict, b: dict) -> bool:
    """Two promote records describe the same shared gate when all the
    federation-stable fields agree. promoted_at always differs on a re-promote
    and `note` is operator metadata, so neither participates in the check.
    """
    keys = ("domain", "gate_id", "gate_category", "gate", "origin_repo", "level")
    return all(a.get(k) == b.get(k) for k in keys)


def validate_gate_id(gate_id: str) -> None:
    if not GATE_ID_RE.fullmatch(gate_id):
        raise ValueError(f"gate_id must be 12 lowercase hex characters: {gate_id}")


def shared_gate_path(shared_root: Path, gate_id: str) -> Path:
    gates_dir = (shared_root / "gates").resolve()
    out_path = (gates_dir / f"{gate_id}.json").resolve()
    try:
        out_path.relative_to(gates_dir)
    except ValueError as error:
        raise ValueError(f"gate output path escapes shared registry: {out_path}") from error
    return out_path


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--gates", required=True, type=Path, help="Path to approved-gates.md produced by export_gates.")
    parser.add_argument("--gate-id", required=True, help="12-hex gate_id to promote.")
    parser.add_argument("--origin-repo", required=True, help="Identifier of the repo this gate originated from.")
    parser.add_argument("--shared-root", required=True, type=Path, help="Root directory of the shared registry.")
    parser.add_argument("--note", default="", help="Optional operator note to persist with the record.")
    return parser.parse_args(argv)


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    try:
        validate_gate_id(args.gate_id)
    except ValueError as exc:
        print(str(exc), file=sys.stderr)
        return 2
    if not args.gates.is_file():
        print(f"gates file not found: {args.gates}", file=sys.stderr)
        return 2
    block = find_gate(args.gates, args.gate_id)
    if not block:
        print(f"gate_id not found: {args.gate_id}", file=sys.stderr)
        return 3
    if not block.get("gate_category") or not block.get("gate"):
        print(f"gate block for {args.gate_id} missing gate_category or gate", file=sys.stderr)
        return 4
    record = build_record(block, args.origin_repo, args.note)
    try:
        validate_record_for_promote(record)
    except ValueError as exc:
        print(f"invalid record: {exc}", file=sys.stderr)
        return EXIT_INVALID_RECORD
    try:
        out_path = shared_gate_path(args.shared_root, args.gate_id)
        out_path.parent.mkdir(parents=True, exist_ok=True)
        assert_regular_file_destination(out_path, label="Promote output")
    except ValueError as exc:
        print(str(exc), file=sys.stderr)
        return 2
    # Conflict-check and write under one sidecar lock so two concurrent
    # different-origin promoters can't both pass the conflict check then
    # race on os.replace. Pre-B-5 the existence/content check ran outside
    # any lock and the loser silently overwrote the winner's provenance.
    with atomic_rewrite(out_path) as (current_text, commit):
        if current_text.strip():
            try:
                existing = json.loads(current_text)
            except (json.JSONDecodeError, ValueError):
                existing = None
            if isinstance(existing, dict) and not _records_match_ignoring_volatile(existing, record):
                print(
                    f"gate {args.gate_id} already promoted from "
                    f"{existing.get('origin_repo', '?')!r} at "
                    f"{existing.get('promoted_at', '?')}; refusing to overwrite with "
                    f"{record['origin_repo']!r}",
                    file=sys.stderr,
                )
                return EXIT_CONFLICT
        commit(json.dumps(record, indent=2, sort_keys=True) + "\n")
    print(f"promoted gate {args.gate_id} -> {out_path}")
    return 0


if __name__ == "__main__":
    sys.exit(main())
