#!/usr/bin/env python3
"""arq-connection v0 · credential-broker FOUNDATION · not yet the complete Connect-GitHub flow.

Per operator scope-honesty correction 2026-05-21:
  "arq-connection v0 = credential broker foundation · NOT yet the
   complete Connect GitHub product flow"

What v0 IS
──────────
The substrate-attested foundation for the future user-facing credential
broker. v0 can read/audit existing credentials and scaffolds the future
connect/disconnect flow with substrate evidence on every step. It is
the architectural primitive that future provider-specific OAuth/App-
install flows plug into.

What v0 is NOT (deferred)
─────────────────────────
- A complete customer-facing "Connect GitHub" product flow. The
  OAuth/App-install integration for each provider is operator-tier
  scope · lands as separate PR per provider.
- A per-user RBAC ACL (arq://body/connection/<user>/<service>). The
  v0 audit-only model means any local caller can read · per-call
  substrate audit IS the security layer until ACL lands.
- Non-keychain vault adapters · v0 ships macos_keychain only.

Scope honest naming
───────────────────
Users do not yet click "Connect <provider>" in a browser through this
primitive (for any provider). Operators/devs provision credentials
directly into the io.arqera.twin.<service>.<resource> keychain pattern;
this primitive gives them list/access/audit/revoke on top. The user-
facing OAuth UX is the next scope (separate PR per provider · operator-
approved · GitHub is just the first runtime pressure surfacing this
work · the primitive itself is provider-agnostic).

Per operator directive 2026-05-21 + north-star architecture
(arq://doc/principle/arqera-north-star-architecture-v1-2026-05-21):
  "Users should never see GitHub App IDs, PEM files, keychain commands,
   vault mechanics, or provider-specific auth details unless they are
   an admin in an advanced audit view."
This primitive is the foundation that lets future surfaces honour that
contract · it is not itself the customer-facing surface yet.

Substrate canonical address: arq://body/identity/arqera-credential-broker

Verbs (v0)
──────────
  list                     · enumerate existing ARQERA connections in
                             plain English ("Figma · connected"). Reads
                             keychain entries matching the
                             io.arqera.twin.<service>.<resource> pattern.
  access <service> <res>   · audit-emitting credential fetch · returns
                             value to caller · emits arq://act/
                             credential_accessed/<id> with worker
                             identity + value_length (NEVER the secret).
                             --audit-only verifies presence without
                             returning the value.
  connect <service>        · SCAFFOLD ONLY · prints the dev/operator
                             unblock path (security add-generic-
                             password) and emits connection_initiated.
                             DOES NOT yet run a customer OAuth/install
                             flow. Provider-specific implementations
                             land per operator approval.
  disconnect <service>     · removes vault entries for the service +
                             emits connection_revoked.

Vault adapters (pluggable · v0 ships ONE)
─────────────────────────────────────────
  macos_keychain (default · v0 only · pattern:
                   io.arqera.twin.<service>.<resource>)
  Planned · NOT BUILT: 1password · bitwarden · hashicorp_vault ·
                       k8s_secrets · aws_secrets_manager ·
                       gcp_secret_manager · azure_keyvault

Authority scope (bounded · per operator directive consolidate-not-blow-up)
──────────────────────────────────────────────────────────────────────────
  ALLOWED:  read credentials on behalf of authorised workers with
            per-call substrate audit · scaffold connect/disconnect
  DENIED:   write raw credentials (operator/admin only via vault tooling)
            customer-facing OAuth integrations (separate scope per
            provider · operator-approved)
            cross-user access · unaudited reads
"""
from __future__ import annotations

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from datetime import datetime, timezone

POLICY_VERSION = "arq-connection-v0-2026-05-21"
# TWIN_BIN resolution order: explicit env var → PATH lookup → None.
# When None, audit emission is skipped with a stderr warning · never silently.
TWIN_BIN = os.environ.get("TWIN_BIN") or shutil.which("twin")
KEYCHAIN_PREFIX = "io.arqera.twin."

# Human-readable display names for known services.
# Operator may extend this map · the primitive uses fallback Title Case
# for unknown services.
SERVICE_DISPLAY = {
    "figma": "Figma",
    "github": "GitHub",
    "github-app": "GitHub (App-mediated)",
    "google": "Google",
    "google-workspace": "Google Workspace",
    "slack": "Slack",
    "notion": "Notion",
    "stripe": "Stripe",
    "openai": "OpenAI",
    "anthropic": "Anthropic",
    "cloudflare": "Cloudflare",
    "linear": "Linear",
    "modal": "Modal",
}


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def now_compact() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")


_REF_TIMESTAMP_SUFFIX = re.compile(r"-?\d{4}-\d{2}-\d{2}T\d{4,6}Z$")


_EMIT_WARN_LOGGED = False


def emit_act(act_type: str, ref: str, payload: dict) -> str | None:
    """Emit a signed substrate act. Auto-skips timestamp append if ref already has one.

    If the twin binary cannot be located (env override absent + no `twin` on PATH),
    the skip is loud (stderr warning once per process) — never silent — so audit
    drops never bypass a core security control unnoticed.
    """
    global _EMIT_WARN_LOGGED
    if not TWIN_BIN or not os.path.exists(TWIN_BIN):
        if not _EMIT_WARN_LOGGED:
            print(
                "arq-connection: WARN twin binary not found "
                "(set TWIN_BIN or install `twin` on PATH) · audit acts will be skipped",
                file=sys.stderr,
            )
            _EMIT_WARN_LOGGED = True
        return None
    full_ref = ref if _REF_TIMESTAMP_SUFFIX.search(ref) else f"{ref}-{now_compact()}"
    try:
        r = subprocess.run(
            [TWIN_BIN, "--use-keychain", "act", "emit", "act", act_type,
             full_ref,
             "--payload", json.dumps({**payload, "policy": POLICY_VERSION, "issued_at": now_iso()})],
            check=False, timeout=10, capture_output=True, text=True,
        )
        for line in (r.stdout or "").splitlines():
            if line.startswith("arq://act/"):
                return line.strip()
    except Exception:
        return None
    return None


# ───── vault adapter · macos_keychain (only adapter in v0) ─────


def _keychain_list_services(raw: bool = False) -> list[tuple[str, str]]:
    """Enumerate io.arqera.twin.* entries. Returns (service_name, resource) tuples.
    macOS keychain doesn't have a clean list-by-prefix API · use dump-keychain
    and filter the output. Read-only · skips passwords.

    raw=False (default) — for display: dedupes rotation-archived variants and
        returns cleaned resource names. Use for `list`.
    raw=True — for mutation paths: returns ALL keychain entries with their
        exact stored names (including `.v2-archived`, `.readonly-archived`
        etc). Use for `disconnect` so deletion targets the actual entries.
    """
    try:
        out = subprocess.run(
            ["security", "dump-keychain"], capture_output=True, text=True, timeout=15
        ).stdout
    except Exception:
        return []
    found: list[tuple[str, str]] = []
    seen: set[tuple[str, str]] = set()
    for line in out.splitlines():
        m = re.search(r'"svce"<blob>="(io\.arqera\.twin\.[^"]+)"', line)
        if not m:
            continue
        full = m.group(1)
        rest = full[len(KEYCHAIN_PREFIX):]
        if "." not in rest:
            continue
        service, _, resource = rest.partition(".")
        if raw:
            if (service, resource) in seen:
                continue
            seen.add((service, resource))
            found.append((service, resource))
            continue
        clean_resource = re.sub(r"\.(v\d+-archived|readonly-archived|.*-archived)$", "", resource)
        if (service, clean_resource) in seen:
            continue
        seen.add((service, clean_resource))
        found.append((service, clean_resource))
    return sorted(found)


def _keychain_read(service: str, resource: str) -> str | None:
    """Read a credential value · returns None if missing."""
    svc_name = f"{KEYCHAIN_PREFIX}{service}.{resource}"
    try:
        r = subprocess.run(
            ["security", "find-generic-password", "-s", svc_name, "-w"],
            capture_output=True, text=True, timeout=10,
        )
        if r.returncode != 0:
            return None
        return r.stdout.rstrip("\n")
    except Exception:
        return None


def _keychain_delete(service: str, resource: str) -> bool:
    svc_name = f"{KEYCHAIN_PREFIX}{service}.{resource}"
    try:
        r = subprocess.run(
            ["security", "delete-generic-password", "-s", svc_name],
            capture_output=True, text=True, timeout=10,
        )
        return r.returncode == 0
    except Exception:
        return False


# ───── verbs ─────


def display_name(service: str) -> str:
    return SERVICE_DISPLAY.get(service, service.replace("-", " ").title())


def cmd_list(args: argparse.Namespace) -> int:
    entries = _keychain_list_services()
    # Group resources by service
    by_service: dict[str, list[str]] = {}
    for svc, res in entries:
        by_service.setdefault(svc, []).append(res)
    if not by_service:
        print("arq-connection: no ARQERA connections found")
        print("  (run `arq-connection connect <service>` to add one)")
        return 0
    print(f"arq-connection: {len(by_service)} ARQERA connections")
    for svc in sorted(by_service):
        resources = sorted(by_service[svc])
        print(f"  ✓ {display_name(svc):28} · connected ({len(resources)} resource{'s' if len(resources) != 1 else ''})")
    if args.verbose:
        print("")
        print("Resources (vault keys hidden · admin-audit view only):")
        for svc in sorted(by_service):
            for res in sorted(by_service[svc]):
                print(f"  {svc}.{res}")
    return 0


def cmd_access(args: argparse.Namespace) -> int:
    """RBAC-mediated credential fetch. v0 RBAC: any caller can read · audit is
    the security layer · future versions check caller identity against
    arq://body/connection/<user>/<service> ACL."""
    value = _keychain_read(args.service, args.resource)
    binding_ref = f"{args.service}-{args.resource}-{(args.requesting_worker or 'anon').replace('/', '_')}"
    if value is None:
        emit_act("credential_access_denied", binding_ref, {
            "service": args.service,
            "resource": args.resource,
            "reason": "not_present_in_vault",
            "requesting_worker": args.requesting_worker,
            "vault_adapter": "macos_keychain",
        })
        print(f"arq-connection: ✗ {args.service}.{args.resource} not in vault", file=sys.stderr)
        print(f"  hint: run `arq-connection connect {args.service}`", file=sys.stderr)
        return 1
    addr = emit_act("credential_accessed", binding_ref, {
        "service": args.service,
        "resource": args.resource,
        "requesting_worker": args.requesting_worker,
        "vault_adapter": "macos_keychain",
        "value_length": len(value),  # never log the secret · just length for audit
    })
    if args.audit_only:
        print(f"arq-connection: ✓ credential present · {args.service}.{args.resource}")
        if addr:
            print(f"  audit: {addr}")
        return 0
    # Print value to stdout · caller is expected to consume securely
    sys.stdout.write(value)
    if not value.endswith("\n"):
        sys.stdout.write("\n")
    if addr:
        print(f"arq-connection: audit emitted at {addr}", file=sys.stderr)
    return 0


def cmd_connect(args: argparse.Namespace) -> int:
    """v0 · scaffold the user-consent flow. Provider-specific OAuth/install
    implementations land per operator approval. v0 prints the manual path
    for dev/operator unblock + emits connection_initiated act."""
    binding_ref = f"{args.service}-initiated"
    emit_act("connection_initiated", binding_ref, {
        "service": args.service,
        "vault_adapter": "macos_keychain",
        "user_action_required": True,
    })
    print(f"arq-connection: initiating connection to {display_name(args.service)}")
    print(f"  v0 scaffold · the user-consent flow for {args.service} has not been wired yet.")
    print(f"  For dev/operator setup, set the credential via:")
    print(f"    security add-generic-password -s io.arqera.twin.{args.service}.<resource> -a <acct> -w '<value>'")
    print(f"  Then run `arq-connection list` to verify it appears as connected.")
    print(f"")
    print(f"  Customer-facing OAuth/install flow is operator-tier scope per provider · this is the dev unblock.")
    return 0


def cmd_disconnect(args: argparse.Namespace) -> int:
    binding_ref = f"{args.service}-{args.resource or 'all'}"
    if args.resource:
        ok = _keychain_delete(args.service, args.resource)
        failed: list[str] = [] if ok else [f"{args.service}.{args.resource}"]
        attempted = 1
        succeeded = 1 if ok else 0
    else:
        # Disconnect ALL keychain entries for the service · use raw=True so
        # rotation-archived variants are targeted by their actual stored names.
        # Iterate explicitly (no all() short-circuit) so one failure does not
        # leave the remaining entries undeleted.
        entries = [(s, r) for s, r in _keychain_list_services(raw=True) if s == args.service]
        attempted = len(entries)
        succeeded = 0
        failed = []
        for s, r in entries:
            if _keychain_delete(s, r):
                succeeded += 1
            else:
                failed.append(f"{s}.{r}")
        ok = attempted > 0 and not failed
    if not ok:
        msg = f"arq-connection: ✗ disconnect failed"
        if attempted == 0:
            msg += " (nothing to remove)"
        else:
            msg += f" ({succeeded}/{attempted} removed · {len(failed)} failed: {', '.join(failed[:5])})"
        print(msg, file=sys.stderr)
        emit_act("connection_revoke_failed", binding_ref, {
            "service": args.service,
            "resource": args.resource,
            "attempted": attempted,
            "succeeded": succeeded,
            "failed_entries": failed,
        })
        return 1
    addr = emit_act("connection_revoked", binding_ref, {
        "service": args.service,
        "resource": args.resource,
        "removed_count": succeeded,
        "vault_adapter": "macos_keychain",
    })
    print(f"arq-connection: ✓ {display_name(args.service)} disconnected ({succeeded} entr{'ies' if succeeded != 1 else 'y'})")
    if addr:
        print(f"  audit: {addr}")
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    sub = parser.add_subparsers(dest="verb", required=True)

    p_list = sub.add_parser("list", help="Enumerate ARQERA connections (plain English)")
    p_list.add_argument("--verbose", action="store_true", help="Show resource-level detail (admin-audit only)")
    p_list.set_defaults(func=cmd_list)

    p_access = sub.add_parser("access", help="RBAC-mediated credential read · audit-emitting")
    p_access.add_argument("service")
    p_access.add_argument("resource")
    p_access.add_argument("--requesting-worker", default=None,
                          help="Worker identity making the request (substrate-attested in the audit act)")
    p_access.add_argument("--audit-only", action="store_true",
                          help="Verify presence + emit audit · do not print the value")
    p_access.set_defaults(func=cmd_access)

    p_connect = sub.add_parser("connect", help="Initiate connection · scaffolds the user-consent flow")
    p_connect.add_argument("service")
    p_connect.set_defaults(func=cmd_connect)

    p_dis = sub.add_parser("disconnect", help="Revoke connection · removes vault entry")
    p_dis.add_argument("service")
    p_dis.add_argument("--resource", default=None,
                       help="Specific resource to revoke · omit to revoke all resources for the service")
    p_dis.set_defaults(func=cmd_disconnect)

    args = parser.parse_args()
    return args.func(args)


if __name__ == "__main__":
    raise SystemExit(main())
