#!/usr/bin/env python3
"""Install Codex/Claude hook config entries for the shared learning collector."""

from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import re
import stat
import pathlib
import shutil
import shlex
import subprocess
import sys
from typing import Any

from state_paths import repo_state_dir


DEFAULT_EVENTS = [
    "SessionStart",
    "UserPromptSubmit",
    "PreToolUse",
    "PostToolUse",
    "Stop",
    "SubagentStop",
    "SessionEnd",
    "Notification",
    "PreCompact",
]
RUNTIMES = {"codex", "claude"}

EVENT_SOURCES_PATH = pathlib.Path(__file__).resolve().parent.parent / "skills" / "alc-core" / "references" / "event-sources.json"
EVENT_SOURCES_SCHEMA_PATH = (
    pathlib.Path(__file__).resolve().parent.parent
    / "skills"
    / "alc-core"
    / "references"
    / "event-sources.schema.json"
)


def load_json_payload(path: pathlib.Path) -> Any:
    if not path.exists():
        raise ValueError(f"{path} is missing")
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as error:
        raise ValueError(f"{path} is not valid JSON: {error}") from error
    return payload


def _normalize_event(value: str) -> str:
    normalized = re.sub(r"(?<!^)(?=[A-Z])", "_", value).replace("-", "_")
    return normalized.lower()


def _event_sources_path() -> pathlib.Path:
    override = os.environ.get("ALC_EVENT_SOURCES_PATH")
    return pathlib.Path(override).expanduser() if override else EVENT_SOURCES_PATH


def _event_sources_schema_path() -> pathlib.Path:
    override = os.environ.get("ALC_EVENT_SOURCES_SCHEMA_PATH")
    return pathlib.Path(override).expanduser() if override else EVENT_SOURCES_SCHEMA_PATH


def _load_schema(path: pathlib.Path) -> dict[str, Any]:
    payload = load_json_payload(path)
    if not isinstance(payload, dict):
        raise ValueError(f"{path} must contain a JSON object")
    return payload


def _validate_event_sources(rows: list[Any], schema: dict[str, Any]) -> list[dict[str, str]]:
    if not isinstance(rows, list):
        raise ValueError(f"{_event_sources_path()} must contain a JSON array")

    item_schema = schema.get("items")
    if not isinstance(item_schema, dict):
        raise ValueError(f"{_event_sources_schema_path()} has invalid 'items' schema")
    properties = item_schema.get("properties")
    if not isinstance(properties, dict):
        raise ValueError(f"{_event_sources_schema_path()} has invalid 'properties' schema")
    required_fields = set(item_schema.get("required", []))
    allowed_fields = set(properties.keys())
    additional_forbidden = item_schema.get("additionalProperties") is False

    runtime_schema = properties.get("runtime", {})
    runtime_enum = set(runtime_schema.get("enum", []))
    name_schema = properties.get("name", {})
    name_type = name_schema.get("type")
    normalized_schema = properties.get("normalized", {})
    normalized_type = normalized_schema.get("type")
    normalized_pattern = re.compile(normalized_schema.get("pattern", "^[a-z_]+$"))

    validated: list[dict[str, str]] = []
    for idx, row in enumerate(rows):
        if not isinstance(row, dict):
            raise ValueError(f"{_event_sources_path()} row {idx} is not an object")

        row_fields = set(row.keys())
        missing_fields = required_fields - row_fields
        if missing_fields:
            fields = ", ".join(sorted(missing_fields))
            raise ValueError(f"{_event_sources_path()} row {idx} missing required field(s): {fields}")

        if additional_forbidden:
            extra_fields = row_fields - allowed_fields
            if extra_fields:
                fields = ", ".join(sorted(extra_fields))
                raise ValueError(f"{_event_sources_path()} row {idx} has additional properties: {fields}")

        runtime = row.get("runtime")
        if not isinstance(runtime, str) or runtime not in runtime_enum:
            raise ValueError(f"{_event_sources_path()} row {idx} runtime must be one of {sorted(runtime_enum)}")
        if name_type != "string" or not isinstance(row.get("name"), str) or not row.get("name"):
            raise ValueError(f"{_event_sources_path()} row {idx} name must be a non-empty string")
        if normalized_type != "string" or not isinstance(row.get("normalized"), str):
            raise ValueError(f"{_event_sources_path()} row {idx} normalized must be a string")
        normalized = row["normalized"]
        if not normalized_pattern.fullmatch(normalized):
            raise ValueError(f"{_event_sources_path()} row {idx} normalized must match pattern [a-z_]+")

        validated.append({"runtime": runtime, "name": row["name"], "normalized": normalized})

    return validated


def _load_event_sources() -> list[dict[str, str]]:
    event_sources = load_json_payload(_event_sources_path())
    schema = _load_schema(_event_sources_schema_path())
    return _validate_event_sources(event_sources, schema)


def _runtime_normalized_events(event_sources: list[dict[str, str]], runtime: str) -> dict[str, str]:
    mapping: dict[str, str] = {}
    for row in event_sources:
        if row["runtime"] == runtime:
            normalized = row["normalized"]
            if normalized not in mapping:
                mapping[normalized] = row["name"]
    return mapping


def events_for_runtime(
    runtime: str,
    event_sources: list[dict[str, str]],
    overrides: list[str] | None = None,
) -> list[str]:
    if not overrides:
        mapping = _runtime_normalized_events(event_sources, runtime)
        events = []
        for event in DEFAULT_EVENTS:
            normalized = _normalize_event(event)
            if normalized not in mapping:
                raise ValueError(
                    f"event-sources.json is missing mapping for runtime={runtime} event={event}"
                )
            events.append(mapping[normalized])
        return events

    return list(overrides)


def load_json(path: pathlib.Path) -> dict[str, Any]:
    if not path.exists():
        return {}
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as error:
        raise ValueError(f"{path} is not valid JSON: {error}") from error
    if not isinstance(payload, dict):
        raise ValueError(f"{path} must contain a JSON object")
    return payload


def _write_text_no_follow(path: pathlib.Path, text: str) -> None:
    """Write text to `path` refusing to follow symlinks.

    Mirrors the os.open(O_NOFOLLOW | O_CREAT | O_WRONLY | O_TRUNC, 0o600)
    pattern in bin/collect_hook_event so a pre-existing symlink at `path`
    causes an OSError instead of clobbering the symlink target.
    """
    path.parent.mkdir(parents=True, exist_ok=True)
    flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | os.O_NOFOLLOW
    try:
        fd = os.open(str(path), flags, 0o600)
    except OSError as error:
        # ELOOP / EMLINK on most platforms when O_NOFOLLOW trips on a symlink.
        raise ValueError(
            f"refusing to write {path}: target exists and is a symlink"
        ) from error
    with os.fdopen(fd, "w", encoding="utf-8") as handle:
        handle.write(text)


def write_json(path: pathlib.Path, payload: dict[str, Any]) -> None:
    _write_text_no_follow(path, json.dumps(payload, indent=2, sort_keys=True) + "\n")


def agent_config(repo: pathlib.Path) -> dict[str, Any]:
    path = repo / ".agent-learning.json"
    if not path.exists():
        raise ValueError(
            f"{path} is missing; run init_learning_system.py --install-repo-integration --install-hooks first"
        )
    config = load_json(path)
    if not config.get("hook_command"):
        raise ValueError(
            f"{path} has no hook_command; rerun init_learning_system.py with --install-hooks"
        )
    return config


def runtime_config_path(repo: pathlib.Path, runtime: str, scope: str) -> pathlib.Path:
    home = pathlib.Path.home()
    if runtime == "codex":
        return repo / ".codex" / "hooks.json" if scope == "repo" else home / ".codex" / "hooks.json"
    if runtime == "claude":
        return repo / ".claude" / "settings.local.json" if scope == "repo" else home / ".claude" / "settings.json"
    raise ValueError(f"unsupported runtime: {runtime}")


def adapter_command(repo: pathlib.Path, runtime: str, event: str) -> str:
    script = pathlib.Path(__file__).resolve()
    parts = [
        sys.executable,
        str(script),
        "--adapter",
        "--repo",
        str(repo),
        "--runtime",
        runtime,
        "--event",
        event,
    ]
    return " ".join(shlex.quote(part) for part in parts)


def command_exists(rows: list[Any], command: str) -> bool:
    # Compare both as raw strings and as shlex-split token lists so equivalent
    # commands written with different quoting (e.g. by hand vs. via shlex.quote)
    # are not duplicated on a rerun.
    try:
        target_tokens = shlex.split(command)
    except ValueError:
        target_tokens = None
    for row in rows:
        if not isinstance(row, dict):
            continue
        for hook in row.get("hooks", []):
            if not isinstance(hook, dict):
                continue
            existing = hook.get("command")
            if existing == command:
                return True
            if target_tokens is None or not isinstance(existing, str):
                continue
            try:
                if shlex.split(existing) == target_tokens:
                    return True
            except ValueError:
                continue
    return False


def ensure_gitignored(repo: pathlib.Path, path: pathlib.Path) -> dict[str, Any]:
    """Ensure the repo's .gitignore excludes `path`.

    Rationale: runtime hook config (e.g. .codex/hooks.json,
    .claude/settings.local.json) contains absolute paths into $HOME after
    `--apply`. If committed, those paths leak the operator's home directory
    into git history. We append a gitignore entry on demand so the leak is
    blocked by default. No-op if the file is not under the repo or already
    ignored, and no-op when no .git directory is present (path is not a
    git repo). Returns a small report dict for the caller to surface.
    """
    try:
        rel = path.resolve().relative_to(repo.resolve())
    except ValueError:
        return {"status": "outside_repo", "path": str(path)}
    if not (repo / ".git").exists():
        return {"status": "not_a_git_repo", "path": str(rel)}

    gitignore = repo / ".gitignore"
    entry = "/" + str(rel).replace("\\", "/")
    backup_entry = entry + ".agent-learning-bak-*"
    existing = gitignore.read_text(encoding="utf-8") if gitignore.exists() else ""
    lines = {line.strip() for line in existing.splitlines() if line.strip()}
    dir_entry = "/" + rel.parts[0] + "/"
    existing_path = lines & {entry, str(rel).replace("\\", "/"), dir_entry}
    existing_backup = lines & {backup_entry, str(rel).replace("\\", "/") + ".agent-learning-bak-*", dir_entry}
    needed = []
    if not existing_path:
        needed.append(entry)
    if not existing_backup:
        needed.append(backup_entry)
    if not needed:
        return {"status": "already_ignored", "path": entry, "backup_path": backup_entry}

    header = "\n# agent-learning-compounder: hook config contains absolute $HOME paths\n"
    suffix = "" if existing.endswith("\n") or not existing else "\n"
    gitignore.write_text(existing + suffix + header + "\n".join(needed) + "\n", encoding="utf-8")
    return {"status": "added", "path": entry, "backup_path": backup_entry, "gitignore": str(gitignore)}


def ensure_repo_hook_config_untracked(repo: pathlib.Path, path: pathlib.Path) -> dict[str, Any]:
    """Refuse repo-local hook config writes when the target is tracked by git."""
    try:
        rel = path.resolve().relative_to(repo.resolve())
    except ValueError:
        return {"status": "outside_repo", "path": str(path)}
    if not (repo / ".git").exists():
        return {"status": "not_a_git_repo", "path": str(rel)}
    if not shutil.which("git"):
        raise ValueError(f"cannot verify whether {rel} is tracked because git is unavailable")

    result = subprocess.run(
        ["git", "-C", str(repo), "ls-files", "--error-unmatch", "--", str(rel)],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        check=False,
    )
    if result.returncode == 0:
        raise ValueError(
            f"{rel} is tracked by git; refusing to write absolute-path runtime hook config. "
            "Remove it from the index, keep it local/ignored, or use --scope user."
        )
    return {"status": "untracked", "path": str(rel)}


def hook_entry(command: str) -> dict[str, Any]:
    return {
        "matcher": "",
        "hooks": [
            {
                "type": "command",
                "command": command,
            }
        ],
    }


def merge_runtime_hooks(
    repo: pathlib.Path,
    runtime: str,
    scope: str,
    events: list[str],
    apply: bool,
) -> dict[str, Any]:
    path = runtime_config_path(repo, runtime, scope)
    data = load_json(path)
    hooks = data.setdefault("hooks", {})
    if not isinstance(hooks, dict):
        raise ValueError(f"{path} has a non-object hooks field")

    added: list[str] = []
    already_present: list[str] = []
    for event in events:
        command = adapter_command(repo, runtime, event)
        rows = hooks.setdefault(event, [])
        if not isinstance(rows, list):
            raise ValueError(f"{path} hooks.{event} must be a list")
        if command_exists(rows, command):
            already_present.append(event)
            continue
        added.append(event)
        if apply:
            rows.append(hook_entry(command))

    backup = None
    gitignore_report: dict[str, Any] | None = None
    tracking_report: dict[str, Any] | None = None
    if apply and added:
        if scope == "repo":
            tracking_report = ensure_repo_hook_config_untracked(repo, path)
        if path.exists():
            stamp = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
            backup = path.with_name(f"{path.name}.agent-learning-bak-{stamp}")
            _write_text_no_follow(backup, path.read_text(encoding="utf-8"))
        write_json(path, data)
        if scope == "repo":
            gitignore_report = ensure_gitignored(repo, path)

    return {
        "runtime": runtime,
        "scope": scope,
        "config_path": str(path),
        "applied": apply,
        "added": added,
        "already_present": already_present,
        "backup": str(backup) if backup else None,
        "gitignore": gitignore_report,
        "tracking": tracking_report,
    }


def nested(payload: dict[str, Any], *keys: str) -> Any:
    value: Any = payload
    for key in keys:
        if not isinstance(value, dict):
            return None
        value = value.get(key)
    return value


def _manifest_expected_root(config: dict[str, Any], repo: pathlib.Path) -> pathlib.Path:
    repo_state = config.get("repo_state_dir")
    if isinstance(repo_state, str):
        return pathlib.Path(repo_state).expanduser().resolve() / "hooks"
    state_dir = config.get("state_dir")
    personal = config.get("personal")
    if state_dir is not None or personal is not None:
        return repo_state_dir(repo, state_dir, personal) / "hooks"
    raise ValueError("config missing repo_state_dir; rerun init")


def _assert_regular_executable_command(path: pathlib.Path, *, label: str) -> None:
    try:
        mode = path.lstat().st_mode
    except FileNotFoundError as error:
        raise ValueError(f"{label} is not present: {path}") from error

    if stat.S_ISLNK(mode):
        raise ValueError(f"{label} must not be a symlink: {path}")
    if not stat.S_ISREG(mode):
        raise ValueError(f"{label} must be a regular file: {path}")
    if not os.access(path, os.X_OK):
        raise ValueError(f"{label} must be executable: {path}")


def _validate_configured_hook_command(config: dict[str, Any], repo: pathlib.Path) -> pathlib.Path:
    command_value = config.get("hook_command")
    if not isinstance(command_value, str):
        raise ValueError("agent-learning hook configuration is missing a string hook_command")

    command = pathlib.Path(command_value).expanduser()
    if not command.is_absolute():
        raise ValueError(f"hook_command must be absolute: {command_value}")
    _assert_regular_executable_command(command, label="Configured hook_command")

    manifest_path = config.get("hook_manifest")
    if isinstance(manifest_path, str):
        manifest_file = pathlib.Path(manifest_path).expanduser()
        if not manifest_file.exists():
            raise ValueError(f"hook_manifest is missing: {manifest_file}")
        manifest = load_json(manifest_file)
        manifest_command = manifest.get("hook_command")
        if isinstance(manifest_command, str):
            manifest_command_path = pathlib.Path(manifest_command).expanduser().resolve()
            command_resolved = command.resolve()
            if command_resolved != manifest_command_path:
                raise ValueError(
                    "Configured hook_command does not match hook_manifest.hook_command "
                    f"({manifest_command_path})"
                )

    expected_root = _manifest_expected_root(config, repo).resolve()
    try:
        command.resolve().relative_to(expected_root)
    except ValueError as error:
        raise ValueError(
            "Configured hook_command must be inside configured repo_state_dir/hooks: "
            f"got {command}"
        ) from error

    return command


def enriched_event(raw: dict[str, Any], repo: pathlib.Path, runtime: str, event: str) -> dict[str, Any]:
    payload = dict(raw)
    payload["event"] = raw.get("event") or raw.get("type") or raw.get("hook_event") or event
    payload["runtime"] = raw.get("runtime") or runtime
    payload["repo"] = str(repo)
    payload["cwd"] = raw.get("cwd") or str(repo)

    tool = raw.get("tool") or raw.get("tool_name") or raw.get("toolName") or nested(raw, "tool", "name")
    if tool and not payload.get("tool"):
        payload["tool"] = tool

    tool_input = raw.get("tool_input") if isinstance(raw.get("tool_input"), dict) else {}
    path = (
        raw.get("path")
        or raw.get("file")
        or raw.get("skill_path")
        or tool_input.get("path")
        or tool_input.get("file_path")
        or tool_input.get("filepath")
    )
    if path and not payload.get("path"):
        payload["path"] = path

    command = raw.get("command") or tool_input.get("command") or tool_input.get("cmd")
    if command and not payload.get("command"):
        payload["command"] = command

    return payload


def single_arg(value: Any, fallback: str) -> str:
    if isinstance(value, list):
        return str(value[-1]) if value else fallback
    return str(value or fallback)


def run_adapter(args: argparse.Namespace) -> int:
    repo = pathlib.Path(args.repo).expanduser().resolve()
    config = agent_config(repo)
    hook_command = _validate_configured_hook_command(config, repo)
    runtime = single_arg(args.runtime, "unknown")
    event = single_arg(args.event, "Unknown")
    raw_text = sys.stdin.read().strip()
    if raw_text:
        try:
            raw = json.loads(raw_text)
        except json.JSONDecodeError:
            raw = {"label": "non-json-runtime-hook"}
    else:
        raw = {}
    if not isinstance(raw, dict):
        raw = {"label": "non-object-runtime-hook"}

    payload = enriched_event(raw, repo, runtime, event)
    result = subprocess.run(
        [str(hook_command)],
        input=json.dumps(payload, sort_keys=True),
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        check=False,
    )
    if result.returncode != 0 and result.stdout:
        sys.stdout.write(result.stdout)
    if result.stderr:
        sys.stderr.write(result.stderr)
    return result.returncode


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--repo", default=".")
    parser.add_argument("--runtime", action="append", choices=sorted(RUNTIMES))
    parser.add_argument("--scope", choices=["repo", "user"], default="repo")
    parser.add_argument("--event", action="append", help="Runtime hook event to install; defaults to common session/tool events.")
    parser.add_argument("--apply", action="store_true", help="Write config changes. Without this, only print the plan.")
    parser.add_argument("--dry-run", action="store_true", help="Explicit no-op flag for readability; dry-run is the default.")
    parser.add_argument("--adapter", action="store_true", help=argparse.SUPPRESS)
    args = parser.parse_args(argv)

    repo = pathlib.Path(args.repo).expanduser().resolve()
    runtimes = args.runtime or sorted(RUNTIMES)
    event_sources = _load_event_sources()

    try:
        if args.adapter:
            return run_adapter(args)
        agent_config(repo)
        reports = []
        for runtime in runtimes:
            runtime_events = events_for_runtime(runtime, event_sources, args.event)
            reports.append(merge_runtime_hooks(repo, runtime, args.scope, runtime_events, args.apply))
    except ValueError as error:
        print(str(error), file=sys.stderr)
        return 1

    print(
        json.dumps(
            {
                "repo": str(repo),
                "mode": "apply" if args.apply else "dry-run",
                "runtimes": reports,
            },
            indent=2,
            sort_keys=True,
        )
    )
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
