#!/usr/bin/env python3
"""arq-brand-research v0 · structured brainstorm-research-decide primitive.

Per operator directive 2026-05-21:
  "arqera has taken up multiple brand identities over time (so now multiple
  pages, multiple definitions, multiple text boxes, buttons, banners,
  paddings, et al all behave differently, so we need to absolutely
  consolidate too)"

ARQERA's brand identity is a primitive bug — Apple/Amazon/Asos have one
identity that drives every surface; ARQERA has accumulated multiple. This
primitive surfaces the accumulation honestly and scaffolds the operator-
authored consolidation work.

Workflow (each verb is a step):
  audit     · Twin-driven · scan codebase for every brand-identity-affecting
              pattern (taglines · wordmarks · color usage · button variants
              · header/footer copy) · write evidence to docs/brand/audit-<date>.md
  scaffold  · Twin-driven · generate docs/brand/research-<slug>.md template
              with empty sections for operator to author:
                · ARQERA to-be (5-10yr vision)
                · Human-behaviour pillars (what behaviours brand triggers)
                · Apple/Amazon/Asos case studies (what to borrow/reject)
                · Synthesis
                · Identity proposal (wordmark · tagline · palette · voice ·
                  forbidden patterns)
  emit-identity (future) · once operator fills the proposal, populate
              arq://body/identity/arqera-brand on substrate
  verify-surface (future) · given a file/route, check it consumes identity
              body vs has hardcoded brand values

Scope (bounded · per operator directive consolidate-not-blow-up):
  - audit is READ-ONLY · just emits evidence
  - scaffold writes a template doc · operator fills content
  - no brand decisions made by Twin · only by operator
  - no surface mutations until emit-identity verb (deferred)
"""
from __future__ import annotations

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path

POLICY_VERSION = "arq-brand-research-v0-2026-05-21"
# Portable binary resolution: env override → PATH lookup → None.
# When None, emit_act emits a loud stderr WARN and skips · never silent.
TWIN_BIN = os.environ.get("TWIN_BIN") or shutil.which("twin")


def _find_repo_root() -> Path:
    """Locate ARQERA repo root.

    Cannot rely on __file__ relative path because the primitive is often
    invoked from ~/.local/bin/ (symlink-installed). Walk upwards from cwd
    until a directory containing both `frontend/` and `scripts/primitives/`
    is found · honest ARQERA marker · falls back to env override.
    """
    env_root = os.environ.get("ARQERA_REPO_ROOT")
    if env_root and (Path(env_root) / "frontend").exists():
        return Path(env_root).resolve()
    cur = Path.cwd().resolve()
    for parent in [cur, *cur.parents]:
        if (parent / "frontend").is_dir() and (parent / "scripts" / "primitives").is_dir():
            return parent
    # Last fallback: try to find from this script's resolved path
    here = Path(__file__).resolve()
    for parent in here.parents:
        if (parent / "frontend").is_dir() and (parent / "scripts" / "primitives").is_dir():
            return parent
    raise SystemExit(
        "arq-brand-research: ✗ could not locate ARQERA repo root · "
        "set ARQERA_REPO_ROOT env var or run from within the repo tree"
    )


REPO_ROOT = _find_repo_root()
BRAND_DOCS = REPO_ROOT / "docs" / "brand"


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def now_compact() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")


_EMIT_WARN_LOGGED = False


def emit_act(act_type: str, ref: str, payload: dict) -> str | None:
    global _EMIT_WARN_LOGGED
    if not TWIN_BIN or not os.path.exists(TWIN_BIN):
        if not _EMIT_WARN_LOGGED:
            print(
                "arq-brand-research: WARN twin binary not found "
                "(set TWIN_BIN or install `twin` on PATH) · audit acts will be skipped",
                file=sys.stderr,
            )
            _EMIT_WARN_LOGGED = True
        return None
    try:
        r = subprocess.run(
            [TWIN_BIN, "--use-keychain", "act", "emit", "act", act_type,
             f"{ref}-{now_compact()}",
             "--payload", json.dumps({**payload, "policy": POLICY_VERSION, "issued_at": now_iso()})],
            check=False, timeout=10, capture_output=True, text=True,
        )
        for line in (r.stdout or "").splitlines():
            if line.startswith("arq://act/"):
                return line.strip()
    except Exception:
        return None
    return None


# ───── audit · catalogue accumulated brand-identity patterns ─────


def _walk_tsx(root: Path):
    """Yield all .tsx files under root (skips node_modules)."""
    for p in root.rglob("*.tsx"):
        if "node_modules" in p.parts:
            continue
        yield p


TAGLINE_RE = re.compile(
    r"(A workforce that actually works[^\"<>\n]{0,80}|"
    r"Hire trusted AI workers[^\"<>\n]{0,80}|"
    r"workforce that actually[^\"<>\n]{0,80}|"
    r"Hire AI workers[^\"<>\n]{0,80}|"
    r"ARQERA is[^\"<>\n]{0,140}|"
    r"governed operational[^\"<>\n]{0,80}|"
    r"AI that sees, thinks[^\"<>\n]{0,80})",
    re.IGNORECASE,
)
HEX_RE = re.compile(r"#[0-9a-fA-F]{3,8}\b")
HSL_TOKEN_RE = re.compile(r"hsl\(var\(--[\w-]+")
BUTTON_PATTERN_RE = re.compile(
    r"<button\b[^>]*className=[\"\'`][^\"\'`]*?(rounded-[a-z0-9]+|bg-[a-z0-9-]+|px-\d+)[^\"\'`]*?[\"\'`]",
    re.IGNORECASE | re.DOTALL,
)


def cmd_audit(args: argparse.Namespace) -> int:
    root = REPO_ROOT / "frontend"
    if not root.exists():
        print(f"arq-brand-research: ✗ frontend root not found at {root}", file=sys.stderr)
        return 2

    BRAND_DOCS.mkdir(parents=True, exist_ok=True)

    taglines: Counter[str] = Counter()
    tagline_files: dict[str, set[str]] = defaultdict(set)
    hex_colors: Counter[str] = Counter()
    hsl_tokens: Counter[str] = Counter()
    button_variants: Counter[str] = Counter()
    wordmark_files: list[str] = []
    file_count = 0
    for p in _walk_tsx(root):
        file_count += 1
        try:
            text = p.read_text(encoding="utf-8", errors="replace")
        except OSError:
            continue
        for m in TAGLINE_RE.findall(text):
            phrase = m.strip().rstrip(".,;:")[:120]
            taglines[phrase] += 1
            tagline_files[phrase].add(str(p.relative_to(REPO_ROOT)))
        for h in HEX_RE.findall(text):
            hex_colors[h.lower()] += 1
        for t in HSL_TOKEN_RE.findall(text):
            hsl_tokens[t] += 1
        for b in BUTTON_PATTERN_RE.findall(text):
            button_variants[b] += 1
        if "ARQERALogo" in text or 'aria-label="ARQERA Home"' in text or "wordmark" in text.lower():
            wordmark_files.append(str(p.relative_to(REPO_ROOT)))

    out_path = BRAND_DOCS / f"audit-{now_compact()}.md"
    lines: list[str] = []
    lines.append(f"# ARQERA Brand-Identity Audit · {now_iso()}\n")
    lines.append("Generated by `scripts/primitives/arq-brand-research audit`.\n")
    lines.append("Purpose: surface every brand-identity-affecting pattern that has accumulated, ")
    lines.append("so operator-authored consolidation can target the actual fragmentation.\n\n")
    lines.append(f"**Files scanned**: {file_count} .tsx files under `frontend/`\n")
    lines.append(f"**Wordmark-touching files**: {len(set(wordmark_files))}\n")
    lines.append(f"**Distinct tagline variants found**: {len(taglines)}\n")
    lines.append(f"**Distinct hardcoded hex colors**: {len(hex_colors)}\n")
    lines.append(f"**Distinct hsl token usages**: {len(hsl_tokens)}\n")
    lines.append(f"**Distinct button class-pattern matches**: {len(button_variants)}\n\n")

    lines.append("## Tagline variants · `Retires` candidates\n\n")
    if not taglines:
        lines.append("None found by pattern.\n\n")
    for phrase, n in taglines.most_common():
        lines.append(f"### `{phrase[:100]}` ({n} occurrences)\n")
        for fp in sorted(tagline_files[phrase])[:8]:
            lines.append(f"- {fp}\n")
        lines.append("\n")

    lines.append("## Color usage · `Unifies` candidates\n\n")
    lines.append(f"- Hardcoded hex (top 20):\n")
    for h, n in hex_colors.most_common(20):
        lines.append(f"  - `{h}` × {n}\n")
    lines.append(f"\n- HSL CSS-token usage (top 20):\n")
    for t, n in hsl_tokens.most_common(20):
        lines.append(f"  - `{t})` × {n}\n")
    lines.append("\n")

    lines.append("## Wordmark / logo touch points\n\n")
    seen: set[str] = set()
    for fp in wordmark_files:
        if fp in seen:
            continue
        seen.add(fp)
        lines.append(f"- {fp}\n")
    lines.append("\n")

    lines.append("## Button class-pattern variants · `Unifies` candidates (top 12)\n\n")
    for cls, n in button_variants.most_common(12):
        lines.append(f"- `{cls[:80]}` × {n}\n")
    lines.append("\n")

    lines.append("## Next steps · operator-authored\n\n")
    lines.append("Run `arq-brand-research scaffold --name <slug>` to generate the research template; ")
    lines.append("fill the Apple/Amazon/Asos case studies and synthesis sections; then "
                 "`arq-brand-research emit-identity` (future verb) will populate "
                 "`arq://body/identity/arqera-brand` with the decided values.\n")

    out_path.write_text("".join(lines), encoding="utf-8")
    print(f"arq-brand-research: ✓ audit written to {out_path.relative_to(REPO_ROOT)}")
    print(f"  files_scanned={file_count} · taglines={len(taglines)} · hex={len(hex_colors)} · hsl={len(hsl_tokens)} · wordmark_files={len(set(wordmark_files))} · buttons={len(button_variants)}")
    addr = emit_act("brand_audit_completed", out_path.stem, {
        "doc_path": str(out_path.relative_to(REPO_ROOT)),
        "files_scanned": file_count,
        "tagline_variants": len(taglines),
        "hex_colors": len(hex_colors),
        "hsl_tokens": len(hsl_tokens),
        "wordmark_files": len(set(wordmark_files)),
        "button_variants": len(button_variants),
    })
    if addr:
        print(f"  attested: {addr}")
    return 0


# ───── scaffold · operator-fill brand-research template ─────


SCAFFOLD_TEMPLATE = """# Brand Research · {name}

> Operator-authored brainstorm-research-decide for ARQERA's identity.
> Twin generated this template via `arq-brand-research scaffold`.
> Operator fills every section · Twin populates `arq://body/identity/arqera-brand`
> only after this doc has decided values.

Generated: {ts}
Primitive policy: arq-brand-research-v0-2026-05-21

---

## 1. ARQERA to-be (5-10yr horizon)

> What is ARQERA in 5-10 years? Operator must answer concretely · not aspirations,
> the actual category-defining shape. What does the world look like with ARQERA in it?
> What does it look like without?

_(fill)_

## 2. Human-behaviour pillars

> What new behaviours does ARQERA trigger in customers (personal + business)?
> Not features. Behaviours. "I no longer ___" / "I now ___ within seconds" / etc.
> 3-5 pillars max.

_(fill)_

## 3. Case studies · what to borrow · what to reject

### 3a. Apple

> What did Apple do that built its identity? What is ARQERA's equivalent?
> What is uniquely Apple that ARQERA must NOT copy?

_(fill)_

### 3b. Amazon

> Same shape · Amazon's identity machinery · what ARQERA borrows · what ARQERA rejects.

_(fill)_

### 3c. Asos

> Same shape · Asos · marketplace identity · discovery + desire.

_(fill)_

## 4. Synthesis

> What is uniquely ARQERA's identity that none of the case studies can replicate?
> The 1-3 sentences that name ARQERA's brand spine.

_(fill)_

## 5. Identity proposal · values that populate `arq://body/identity/arqera-brand`

### 5a. Legal name
_(fill)_

### 5b. Wordmark treatment
- form: _(wordmark-only / wordmark + symbol / etc)_
- split-color: _(none / [ARQ in foreground, ERA in primary] / etc)_
- typeface: _(custom / system-grotesk / etc)_
- letter-spacing: _(value)_

### 5c. Tagline canonical (one sentence · final)
_(fill)_

### 5d. Primary palette · 3-5 colors max
_(fill · hsl or hex per color)_

### 5e. Secondary palette · accents only
_(fill)_

### 5f. Voice attributes (5-7 adjectives)
_(fill)_

### 5g. Forbidden patterns
> What the brand must NEVER do. Specific.
_(fill)_

## 6. Surface consolidation plan

> Once the identity is decided, list which surfaces need to be retrofitted to consume
> `arq://body/identity/arqera-brand` instead of their current hardcoded values.
> Twin will execute this list in bounded PRs · each citing what it retires/unifies.

_(fill after section 5)_

---

## Operator authority gate

This doc is the SOFT/MID/HARD-tier decision substrate. Until the operator signs off
on section 5 (identity proposal), `arq-brand-research emit-identity` is refused.
Operator signs by editing this line:

`OPERATOR_DECISION_SIGNED: false`  ← set to `true` to authorise emit-identity

Author: _(fill)_
Decision date: _(fill)_
"""


def cmd_scaffold(args: argparse.Namespace) -> int:
    BRAND_DOCS.mkdir(parents=True, exist_ok=True)
    name = args.name.strip().lower().replace(" ", "-")
    if not re.match(r"^[a-z0-9_\-]+$", name):
        print(f"arq-brand-research: ✗ --name must be kebab-case · got {name!r}", file=sys.stderr)
        return 2
    out_path = BRAND_DOCS / f"research-{name}.md"
    if out_path.exists() and not args.overwrite:
        print(f"arq-brand-research: ✗ {out_path.relative_to(REPO_ROOT)} already exists · use --overwrite to replace", file=sys.stderr)
        return 1
    out_path.write_text(SCAFFOLD_TEMPLATE.format(name=name, ts=now_iso()), encoding="utf-8")
    print(f"arq-brand-research: ✓ scaffold written to {out_path.relative_to(REPO_ROOT)}")
    addr = emit_act("brand_research_scaffolded", name, {
        "doc_path": str(out_path.relative_to(REPO_ROOT)),
        "operator_action_required": "fill sections 1-5 · then set OPERATOR_DECISION_SIGNED to true",
    })
    if addr:
        print(f"  attested: {addr}")
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    sub = parser.add_subparsers(dest="verb", required=True)

    p_audit = sub.add_parser("audit", help="Scan codebase · catalogue accumulated brand-identity patterns")
    p_audit.set_defaults(func=cmd_audit)

    p_scaf = sub.add_parser("scaffold", help="Generate operator-fill brand-research template")
    p_scaf.add_argument("--name", required=True, help="kebab-case slug for the research doc")
    p_scaf.add_argument("--overwrite", action="store_true")
    p_scaf.set_defaults(func=cmd_scaffold)

    args = parser.parse_args()
    return args.func(args)


if __name__ == "__main__":
    raise SystemExit(main())
