#!/usr/bin/env python3
"""Inherit a gate from the shared registry into a target repo's approved gates.

Idempotent: reruns are no-ops if the gate_id is already present in the target.
"""
from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent))
from collect_hook_event import assert_regular_file_destination  # noqa: E402
from export_gates import _gate_id  # noqa: E402
from state_paths import atomic_rewrite  # noqa: E402


GATE_ID_RE = re.compile(r"^[a-f0-9]{12}$")
REQUIRED_RECORD_FIELDS = ("domain", "gate_id", "gate_category", "gate", "origin_repo", "promoted_at")
# Conflict exit code: distinct from invalid-input (2) so callers can tell apart
# a same-gate_id / different-origin collision from a malformed-record refusal.
EXIT_CONFLICT = 3


def validate_gate_id(gate_id: str) -> None:
    if not GATE_ID_RE.fullmatch(gate_id):
        raise ValueError(f"gate_id must be 12 lowercase hex characters: {gate_id}")


def shared_gate_path(shared_root: Path, gate_id: str) -> Path:
    gates_dir = (shared_root / "gates").resolve()
    record_path = (gates_dir / f"{gate_id}.json").resolve()
    try:
        record_path.relative_to(gates_dir)
    except ValueError as error:
        raise ValueError(f"shared gate path escapes registry: {record_path}") from error
    return record_path


def validate_record(record: dict, gate_id: str) -> None:
    for field in REQUIRED_RECORD_FIELDS:
        value = record.get(field)
        if not isinstance(value, str) or not value:
            raise ValueError(f"missing or invalid field: {field}")
        if "\n" in value or "\r" in value:
            raise ValueError(f"field contains a newline: {field}")
    if record["gate_id"] != gate_id:
        raise ValueError("record gate_id does not match requested gate_id")
    # Recompute the gate_id from (domain, gate_category, gate) and refuse
    # if it no longer hashes to the requested id. Catches post-promote
    # mutation of the gate text in the shared registry: the filename id
    # still matches, REQUIRED_RECORD_FIELDS still all parse, but the
    # underlying instruction text was swapped. Without this, every
    # inheritor would silently pull the mutated text under the original
    # gate_id and cohort statistics would keep accumulating against the
    # wrong instruction.
    derived = _gate_id(record["domain"], record["gate_category"], record["gate"])
    if derived != gate_id:
        raise ValueError(
            f"record content does not hash to gate_id {gate_id} "
            f"(derived {derived}); possible registry tampering"
        )


def gate_already_present(target: Path, gate_id: str) -> bool:
    pattern = re.compile(rf"^\s*gate_id:\s*{re.escape(gate_id)}\s*$", re.MULTILINE)
    return bool(pattern.search(target.read_text(encoding="utf-8")))


def _existing_derived_from(text: str, gate_id: str) -> str | None:
    """Return the derived_from value for an already-present gate_id, or None.

    Walks block-by-block (split on "- domain:") because the gate_id and
    derived_from for the same gate live in adjacent indented lines under
    one block — a flat regex over the whole file could cross block
    boundaries on a malformed file.
    """
    # Use the (?m)-anchored splitter so the walk works on CRLF files and
    # on files that start directly with "- domain:" (no leading newline).
    # The previous text.split("\n- domain:") returned the whole file as
    # one block in both cases, and the per-block field walk would then
    # cross block boundaries and report a stale derived_from.
    blocks = re.split(r"(?m)^-\s+domain:\s*", text)
    for block in blocks[1:]:
        block_gate_id: str | None = None
        derived_from: str | None = None
        for raw_line in block.splitlines():
            line = raw_line.strip()
            if line.startswith("gate_id:"):
                block_gate_id = line.split(":", 1)[1].strip()
            elif line.startswith("derived_from:"):
                derived_from = line.split(":", 1)[1].strip()
        if block_gate_id == gate_id:
            return derived_from or ""
    return None


def _gate_block(record: dict) -> str:
    """Render the gate block once, validating extra fields up front.

    Kept separate from the file write so the validation can run while we
    hold the file lock without leaving the destination half-written.
    """
    lines = [
        f"\n- domain: {record['domain']}\n",
        f"  gate_id: {record['gate_id']}\n",
        f"  gate_category: {record['gate_category']}\n",
        f"  gate: {record['gate']}\n",
    ]
    # Preserve non-default severity when the shared record carries one.
    # Older records (pre-level) round-trip with no level emitted — no default
    # invented at the federation boundary.
    level = record.get("level")
    if isinstance(level, str) and level:
        if "\n" in level or "\r" in level:
            raise ValueError("field contains a newline: level")
        lines.append(f"  level: {level}\n")
    lines.append(
        f"  derived_from: {record['origin_repo']}:{record['gate_id']}:{record['promoted_at']}\n"
    )
    return "".join(lines)


def append_gate(target: Path, record: dict):
    """Append a single gate block, kept for callers/tests that import it."""
    block = _gate_block(record)
    with target.open("a", encoding="utf-8") as fh:
        fh.write(block)


def parse_args():
    p = argparse.ArgumentParser(description=__doc__)
    p.add_argument("--shared-root", required=True, type=Path)
    p.add_argument("--target-gates", required=True, type=Path)
    p.add_argument("--gate-id", required=True)
    return p.parse_args()


def main():
    args = parse_args()
    try:
        validate_gate_id(args.gate_id)
        record_path = shared_gate_path(args.shared_root, args.gate_id)
    except ValueError as exc:
        print(str(exc), file=sys.stderr)
        return 2
    if not record_path.is_file():
        print(f"shared gate not found: {record_path}", file=sys.stderr)
        return 2
    if not args.target_gates.is_file():
        print(f"target gates not found: {args.target_gates}", file=sys.stderr)
        return 2
    try:
        assert_regular_file_destination(args.target_gates, label="Target gates")
    except ValueError as exc:
        print(str(exc), file=sys.stderr)
        return 2
    try:
        record = json.loads(record_path.read_text(encoding="utf-8"))
        if not isinstance(record, dict):
            raise ValueError("record must be a JSON object")
        validate_record(record, args.gate_id)
        block = _gate_block(record)
    except (json.JSONDecodeError, ValueError) as exc:
        print(f"invalid shared gate record: {exc}", file=sys.stderr)
        return 2

    # Read+check+write under the shared sidecar lock. Both export_gates
    # and gates_inherit acquire the same `<target_gates>.lock` so a
    # concurrent re-export cannot wipe a freshly-appended inherited
    # block, and two concurrent inherits of the same gate_id can no
    # longer both observe absence and both append. Pre-B-5 the flock
    # was on the data file's inode, which does not survive export's
    # os.replace -- this fix coordinates both writers on a sidecar
    # that is never renamed.
    with atomic_rewrite(args.target_gates) as (current_text, commit):
        existing_derived = _existing_derived_from(current_text, args.gate_id)
        if existing_derived is not None:
            incoming_derived = (
                f"{record['origin_repo']}:{record['gate_id']}:{record['promoted_at']}"
            )
            if existing_derived == incoming_derived:
                return 0
            # Same gate_id, different origin_repo (or different
            # promoted_at) -- provenance would be lost if we silently
            # no-op'd. Exit non-zero so the caller knows to investigate.
            print(
                f"gate_id {args.gate_id} already inherited from "
                f"{existing_derived!r}; refusing to re-inherit from "
                f"{incoming_derived!r}",
                file=sys.stderr,
            )
            return EXIT_CONFLICT
        # Absent -- append. The block already starts with a leading
        # newline so it concatenates cleanly with any existing content.
        commit(current_text + block)
    return 0


if __name__ == "__main__":
    sys.exit(main())
