#!/usr/bin/env bash
set -euo pipefail

# Roll — AI Agent Convention Manager
# Single source of truth for how all AI coding agents behave.

VERSION="3.0.0"
ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
ROLL_CONFIG="${ROLL_HOME}/config.yaml"
ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
ROLL_TEMPLATES="${ROLL_HOME}/conventions/templates"

# Find package root (resolve symlinks so it works from ~/.local/bin/roll or npm global bin)
_source="${BASH_SOURCE[0]:-$0}"
while [[ -L "$_source" ]]; do
  _dir="$(cd "$(dirname "$_source")" && pwd)"
  _source="$(readlink "$_source")"
  [[ "$_source" != /* ]] && _source="$_dir/$_source"
done
SCRIPT_DIR="$(cd "$(dirname "$_source")" && pwd)"
# Respect a pre-set ROLL_PKG_DIR (tests override it to an isolated dir so the
# curl-update atomic swap can never touch the real install tree).
ROLL_PKG_DIR="${ROLL_PKG_DIR:-$(dirname "$SCRIPT_DIR")}"
ROLL_PKG_CONVENTIONS="${ROLL_PKG_DIR}/conventions"

# US-I18N-001: i18n engine (locale resolution + message catalog).
# shellcheck source=../lib/i18n.sh
[[ -f "${ROLL_PKG_DIR}/lib/i18n.sh" ]] && source "${ROLL_PKG_DIR}/lib/i18n.sh"

# US-WATCH-001: upstream compatibility watch helpers.
# shellcheck source=../lib/watch.sh
[[ -f "${ROLL_PKG_DIR}/lib/watch.sh" ]] && source "${ROLL_PKG_DIR}/lib/watch.sh"

# US-CTX-001: context-feed budget — bounds the material (chiefly story feature
# files) injected into the inner agent's prompt; summarizes/chunks over-budget
# material with an explicit notice instead of silently truncating or hard-stuffing.
# shellcheck source=../lib/context_feed_budget.sh
[[ -f "${ROLL_PKG_DIR}/lib/context_feed_budget.sh" ]] && source "${ROLL_PKG_DIR}/lib/context_feed_budget.sh"

# Colors
RED=$'\033[0;31m'
GREEN=$'\033[0;32m'
YELLOW=$'\033[0;33m'
CYAN=$'\033[0;36m'
BOLD=$'\033[1m'
NC=$'\033[0m'

# Respect NO_COLOR
if [[ -n "${NO_COLOR:-}" ]]; then
  RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC=''
fi

info()  { echo -e "${CYAN}[roll]${NC} $*"; }
ok()    { echo -e "${GREEN}[roll]${NC} $*"; }
warn()  { echo -e "${YELLOW}[roll]${NC} $*"; }
err()   { echo -e "${RED}[roll]${NC} $*" >&2; }

# Tracks merge actions across a single init run; reset before each batch
_ROLL_MERGE_SUMMARY=()

canonical_dir() {
  local path="$1"
  [[ -d "$path" ]] || return 1
  (cd "$path" >/dev/null 2>&1 && pwd -P)
}

# Return a human-readable name for an AI tool dir.
# Handles nested paths like ~/.openclaw/workspace → "openclaw", ~/.pi/agent → "pi".
ai_tool_name() {
  local dir="$1"
  local bn
  bn="$(basename "$dir" | sed 's/^\.//')"
  if [[ "$bn" == "workspace" ]]; then
    bn="$(basename "$(dirname "$dir")" | sed 's/^\.//')"
  elif [[ "$bn" == "agent" || "$bn" == "workspace" ]]; then
    bn="$(basename "$(dirname "$dir")" | sed 's/^\.//')"
  fi
  # Antigravity (agy) reuses ~/.gemini/ from the deprecated Gemini CLI for
  # its config dir, so a literal `gemini` basename now identifies agy.
  [[ "$bn" == "gemini" ]] && bn="agy"
  # FIX-126: Kimi upstream renamed its CLI to kimi-code and its config dir
  # to ~/.kimi-code/; map both old and new basenames to the canonical
  # "kimi" agent identifier so downstream argv / config / sync paths stay
  # uniform across the upgrade.
  [[ "$bn" == "kimi-code" ]] && bn="kimi"
  echo "$bn"
}

lower_name() {
  echo "$1" | tr '[:upper:]' '[:lower:]'
}


# FIX-128: agent → binary-name(s) lookup. First binary found wins. Used by
# _agent_installed_by_name to enforce "CLI must exist on PATH" detection
# instead of the old "config dir exists" check (which Roll's own convention
# sync would fake — see FIX-128).
_agent_bin_names() {
  case "$1" in
    claude)            echo "claude" ;;
    codex|openai)      echo "codex" ;;        # openai is a Roll alias for codex
    agy|gemini)        echo "agy gemini" ;;   # gemini reuses ~/.gemini for agy
    kimi)              echo "kimi-code kimi-cli kimi" ;;  # FIX-126
    deepseek)          echo "deepseek" ;;
    qwen)              echo "qwen" ;;
    pi)                echo "pi" ;;
    *)                 return 1 ;;
  esac
}

# Antigravity is the product; `agy` is its CLI binary; `~/.gemini/` is the
# legacy Google-Gemini-CLI config dir it reuses. Accept all three spellings on
# input and collapse them to the canonical `agy` token, so every downstream
# consumer (dispatch, routes, peer/status, usage) keys on one name.
_canonical_agent_name() {
  case "$1" in
    antigravity|gemini) echo "agy" ;;
    *)                  echo "$1" ;;
  esac
}

# Human-facing label for `roll agent list` / `roll agent`. Most agents show
# their bare token; agy shows the product name with the binary in parens so
# both the recognizable name and the typed command stay visible.
_agent_display_name() {
  case "$(_canonical_agent_name "$1")" in
    agy) echo "antigravity (agy)" ;;
    *)   echo "$1" ;;
  esac
}

# FIX-128: detect whether an AI agent (by canonical name) is actually
# usable on this machine. For CLI-only agents this means "binary on PATH";
# GUI / bundled-binary agents keep their special-case paths. Falls back
# to dir-existence only for unknown agents the operator has registered
# manually (forward-compatible with future additions).
_agent_installed_by_name() {
  local agent="$1"
  local dir="${2:-}"
  case "$agent" in
    trae)
      [[ -d "$HOME/Library/Application Support/Trae" ]] || [[ -d "$HOME/.config/Trae" ]]
      return
      ;;
    opencode)
      [[ -x "$HOME/.opencode/bin/opencode" ]]
      return
      ;;
    cursor)
      # cursor ships a GUI + an optional CLI; either path counts as "installed".
      command -v cursor >/dev/null 2>&1 || [[ -d "$HOME/.cursor" ]]
      return
      ;;
    openclaw)
      [[ -d "$HOME/.openclaw/workspace" ]]
      return
      ;;
  esac
  local bins
  if bins=$(_agent_bin_names "$agent" 2>/dev/null); then
    local b
    for b in $bins; do
      command -v "$b" >/dev/null 2>&1 && return 0
    done
    return 1
  fi
  # Unknown agent — fall back to dir presence so user-added entries still work.
  [[ -n "$dir" && -d "$dir" ]]
}

# US-AGENT-020: is NAME a known agent in this machine's registry? "Known"
# means it has a binary-name entry (_agent_bin_names) OR is one of the
# special-cased non-PATH agents (_agent_installed_by_name's case arms). This
# is a name-validity check, NOT an installed check — agents.yaml may name an
# agent the operator has not installed yet. deepseek is intentionally NOT
# listed here as a routable agent (it is a model pi loads), so it is treated
# as unknown for routing purposes. Returns 0 when known, 1 otherwise.
_agent_is_known() {
  local name; name="$(_canonical_agent_name "$1")"
  case "$name" in
    deepseek)                         return 1 ;;
    trae|opencode|cursor|openclaw)    return 0 ;;
  esac
  _agent_bin_names "$name" >/dev/null 2>&1
}

# US-AGENT-020: schema-v3 agents.yaml loader.
#
# Layout (per-machine, never committed — see .roll/.gitignore):
#   schema: v3
#   easy:     { agent: kimi }
#   default:  { agent: claude }
#   hard:     { agent: claude }
#   fallback: { agent: pi }
#
# `_agents_config_slot SLOT [PATH]` reads the `agent:` value for SLOT
# (easy|default|hard|fallback) and prints the agent name on stdout.
#   - File missing / slot missing / empty value → prints nothing, exit 1
#     (caller is responsible for its own fallback — typically the `default`
#     slot, then a registered/installed agent).
#   - Value names an unknown agent → WARN on stderr, still print the value
#     and exit 0 so the operator sees a typo rather than a silent drop.
#
# bash 3.2 safe: no declare -A, no mapfile/readarray, no ${var^^}/${var,,}.
_agents_config_path() {
  if [ -n "${ROLL_AGENTS_CONFIG:-}" ] && [ -f "$ROLL_AGENTS_CONFIG" ]; then
    printf '%s\n' "$ROLL_AGENTS_CONFIG"
    return 0
  fi
  if [ -f ".roll/agents.yaml" ]; then
    printf '%s\n' ".roll/agents.yaml"
    return 0
  fi
  return 1
}

# US-AGENT-020 helper: extract the value of an `agent:` key from a single
# line, only when `agent:` appears at a token boundary (line start, or right
# after `{`, `,`, or whitespace) — so `no_agent:` / `sub_agent:` / `agent_x:`
# do not false-match. On success sets _AGENTS_AGENT_VALUE to the raw value
# (still un-trimmed) and returns 0; returns 1 when the line has no agent key.
_agents_line_agent_value() {
  local s="$1" rest=""
  # Collapse any tab to a space first so a tab-separated `agent:` key still
  # hits the space-boundary arm (avoids embedding a literal tab in a case
  # pattern, which bash's case parser rejects).
  s="${s//	/ }"
  case "$s" in
    "agent:"*)            rest="${s#agent:}" ;;
    *[\ ,\{]"agent:"*)    rest="${s#*[ ,{]agent:}" ;;
    *)                    return 1 ;;
  esac
  _AGENTS_AGENT_VALUE="$rest"
  return 0
}

_agents_config_slot() {
  local slot="${1:-}"
  local path="${2:-}"
  if [ -z "$slot" ]; then
    echo "_agents_config_slot: slot name required" >&2
    return 1
  fi
  if [ -z "$path" ]; then
    path="$(_agents_config_path)" || return 1
  fi
  [ -f "$path" ] || return 1

  # Find the slot's top-level block and read its `agent:` value. The block
  # spans from `^<slot>:` to the next top-level key (a line starting with a
  # non-space char). agent: may be inline (`easy: { agent: kimi }`) or nested
  # on a following indented line (`easy:\n  agent: kimi`). The slot key and the
  # `agent:` key are matched at token boundaries so `easy_mode:` / `no_agent:`
  # / `sub_agent:` do not false-match (peer review).
  local line in_block="" agent="" found=""
  while IFS= read -r line || [ -n "$line" ]; do
    # Strip a trailing CR so DOS/CRLF-saved configs still parse.
    line="${line%$'\r'}"
    case "$line" in
      "$slot":|"$slot":" "*|"$slot":\{*)
        in_block=1
        # Inline flow form: easy: { agent: kimi }
        if _agents_line_agent_value "$line"; then
          agent="$_AGENTS_AGENT_VALUE"; found=1
        fi
        ;;
      [!\ ]*)
        # A new top-level key (no leading space) ends the slot block.
        [ -n "$in_block" ] && break
        ;;
      *)
        if [ -n "$in_block" ] && [ -z "$found" ]; then
          if _agents_line_agent_value "$line"; then
            agent="$_AGENTS_AGENT_VALUE"; found=1
          fi
        fi
        ;;
    esac
    [ -n "$found" ] && break
  done < "$path"

  [ -n "$in_block" ] || return 1

  # Strip flow-map punctuation, quotes, inline comments, and surrounding space.
  agent="${agent%%\#*}"
  agent="${agent//\{/}"
  agent="${agent//\}/}"
  agent="${agent//,/}"
  agent="${agent//\"/}"
  agent="${agent//\'/}"
  # Trim leading/trailing whitespace (bash 3.2 safe).
  agent="$(printf '%s' "$agent" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"

  [ -n "$agent" ] || return 1

  if ! _agent_is_known "$agent"; then
    warn "agents.yaml: slot '${slot}' names unknown agent '${agent}' (not in this machine's registry)" >&2
  fi
  printf '%s\n' "$agent"
}

# US-AGENT-026: write the agent for one complexity slot into agents.yaml.
# `_agents_config_set_slot SLOT AGENT [PATH]` rewrites the slot's `agent:`
# value in place (inline flow form `slot: { agent: <name> }`), preserving every
# other line — including comments and unrelated slots. When the file or the
# slot is missing, the slot line is created (the file is seeded with a
# `schema: v3` header). The write is atomic: render to a temp file, then mv
# over the target. Returns 1 on a missing slot name / agent / unwritable path.
_agents_config_set_slot() {
  local slot="${1:-}" agent="${2:-}" path="${3:-}"
  if [ -z "$slot" ] || [ -z "$agent" ]; then
    echo "_agents_config_set_slot: slot and agent required" >&2
    return 1
  fi
  if [ -z "$path" ]; then
    path="$(_agents_config_path 2>/dev/null || true)"
    [ -n "$path" ] || path=".roll/agents.yaml"
  fi

  local dir; dir="$(dirname "$path")"
  [ -d "$dir" ] || mkdir -p "$dir" || return 1

  local tmp; tmp="$(mktemp "${path}.XXXXXX")" || return 1
  local new_line="${slot}: { agent: ${agent} }"

  if [ ! -f "$path" ]; then
    # Fresh file: seed a v3 header, then the slot line.
    printf 'schema: v3\n%s\n' "$new_line" > "$tmp" || { rm -f "$tmp"; return 1; }
    mv "$tmp" "$path"
    return 0
  fi

  # Rewrite in place. Track whether we are inside the target slot's block so a
  # nested `agent:` line gets replaced too. Replace the first agent-bearing
  # line of the block (inline or nested) and drop any further block lines that
  # only carried the old value, collapsing the slot to the inline form.
  local line in_block="" replaced="" found=""
  while IFS= read -r line || [ -n "$line" ]; do
    local raw="$line"
    raw="${raw%$'\r'}"
    case "$raw" in
      "$slot":|"$slot":" "*|"$slot":\{*)
        # Slot header — emit the canonical inline form and enter the block.
        printf '%s\n' "$new_line" >> "$tmp"
        in_block=1; replaced=1; found=1
        # If the header itself carried the inline agent, the whole slot is done.
        case "$raw" in
          *\{*) in_block="" ;;
        esac
        continue
        ;;
      [!\ ]*)
        in_block=""
        ;;
    esac
    if [ -n "$in_block" ]; then
      # Inside a freshly-rewritten nested block: drop the old `agent:` line(s);
      # keep any sibling keys untouched.
      if _agents_line_agent_value "$raw"; then
        continue
      fi
    fi
    printf '%s\n' "$line" >> "$tmp"
  done < "$path"

  if [ -z "$found" ]; then
    # Slot was absent — append it.
    printf '%s\n' "$new_line" >> "$tmp"
  fi

  mv "$tmp" "$path"
  return 0
}

# US-AGENT-028: one-shot migrate a legacy v1 agent-routes.yaml into a v3
# agents.yaml (four complexity slots). Mirrors _loop_migrate_legacy_paths: it
# is idempotent (skips when the v3 file already exists), writes the product to
# a temp file and atomically mv's it over the target so a running loop only
# sees the new config on its next cycle.
#
# `_agents_migrate_v1_to_v3 [V1_PATH] [V3_PATH] [LOCAL_YAML]`
#   V1_PATH    legacy source (default .roll/agent-routes.yaml)
#   V3_PATH    destination   (default .roll/agents.yaml)
#   LOCAL_YAML .roll/local.yaml whose top-level `agent:` is the last-resort
#              default-slot fallback (default .roll/local.yaml)
#
# Mapping (AC US-AGENT-028):
#   - v1 `history.cold_start_default` → `default` slot (and `fallback` slot)
#   - v1 `agents.*` capability ranges → easy / hard by est_min.max: the agent
#     with the smallest max lands in `easy`, the largest in `hard`
#   - ghost agent `deepseek` is dropped (it is a pi-loaded model, not an agent)
#   - when cold_start_default is absent, `.roll/local.yaml`'s `agent:` seeds the
#     `default` slot; any still-empty slot falls back to the default slot agent
#
# Returns 0 on success or no-op; 1 only when the temp render itself fails.
_agents_migrate_v1_to_v3() {
  local v1="${1:-.roll/agent-routes.yaml}"
  local v3="${2:-.roll/agents.yaml}"
  local local_yaml="${3:-.roll/local.yaml}"

  # Idempotent: never clobber an existing v3 config.
  [ -f "$v3" ] && return 0
  # Nothing to migrate from.
  [ -f "$v1" ] || return 0
  command -v python3 >/dev/null 2>&1 || return 0

  local dir; dir="$(dirname "$v3")"
  [ -d "$dir" ] || mkdir -p "$dir" || return 1

  local tmp; tmp="$(mktemp "${v3}.XXXXXX")" || return 1

  if ! ROLL_MIG_V1="$v1" ROLL_MIG_LOCAL="$local_yaml" python3 - > "$tmp" <<'PY'
import os, sys
try:
    import yaml
except ImportError:
    sys.exit(3)

v1_path = os.environ["ROLL_MIG_V1"]
local_path = os.environ.get("ROLL_MIG_LOCAL", "")

try:
    v1 = yaml.safe_load(open(v1_path)) or {}
except Exception:
    sys.exit(3)
if not isinstance(v1, dict):
    sys.exit(3)

GHOST = {"deepseek"}

agents = v1.get("agents") or {}
if not isinstance(agents, dict):
    agents = {}

history = v1.get("history") or {}
cold = history.get("cold_start_default") if isinstance(history, dict) else None

# Last-resort default from .roll/local.yaml `agent:`.
local_agent = None
if local_path and os.path.exists(local_path):
    try:
        loc = yaml.safe_load(open(local_path)) or {}
        if isinstance(loc, dict) and isinstance(loc.get("agent"), str):
            local_agent = loc.get("agent")
    except Exception:
        local_agent = None

def real(name):
    return isinstance(name, str) and name and name not in GHOST

# Rank routable (non-ghost) agents by their est_min.max capability ceiling so
# the smallest-window agent maps to easy and the largest to hard.
ranked = []
for name, cfg in agents.items():
    if not real(name):
        continue
    mx = None
    if isinstance(cfg, dict):
        em = cfg.get("est_min")
        if isinstance(em, dict) and isinstance(em.get("max"), (int, float)):
            mx = em.get("max")
    ranked.append((name, mx if mx is not None else 0))
ranked.sort(key=lambda t: (t[1], t[0]))

default_agent = cold if real(cold) else local_agent
if not real(default_agent) and ranked:
    # Median-ish: pick the agent whose window straddles the default band.
    default_agent = ranked[len(ranked) // 2][0]

easy_agent = ranked[0][0] if ranked else default_agent
hard_agent = ranked[-1][0] if ranked else default_agent

# Any unresolved slot falls back to the default-slot agent.
def pick(a):
    return a if real(a) else default_agent

slots = {
    "easy": pick(easy_agent),
    "default": pick(default_agent),
    "hard": pick(hard_agent),
    "fallback": pick(default_agent),
}

out = ["schema: v3"]
for slot in ("easy", "default", "hard", "fallback"):
    agent = slots[slot]
    if real(agent):
        out.append("%s: { agent: %s }" % (slot, agent))
    else:
        out.append("%s: { agent: }" % slot)
sys.stdout.write("\n".join(out) + "\n")
PY
  then
    rm -f "$tmp"
    return 0
  fi

  mv "$tmp" "$v3"
  return 0
}

# ──────────────────────────────────────────────────────────────
# US-AGENT-021: local agent detection + cheap online probe (cached).
#
# `_agents_installed`  → newline list of agents actually installed on this
#                        machine (binary on PATH / special-case dir). deepseek
#                        is intentionally excluded — it is a model pi loads,
#                        not a routable agent.
# `_agent_available N` → is agent N usable right now? ① on PATH ② a one-shot
#                        sub-second probe (--version) that catches auth/network
#                        failure. Prints "online"/"offline" and exits 0/1.
#                        Result is cached per-machine (project-local), default
#                        30 min, configurable via ROLL_AGENT_PROBE_TTL (seconds).
#
# bash 3.2 safe: no declare -A, no mapfile/readarray, no ${var^^}/${var,,}.
# ──────────────────────────────────────────────────────────────

# Candidate routable agents, in the same order _first_installed_agent scans.
# deepseek is deliberately absent (it is a pi-loaded model, not an agent).
_AGENT_REGISTRY_NAMES="claude codex kimi qwen agy pi cursor opencode trae openclaw"

# List agents installed on this machine (one per line). Empty when none.
# Used by the cascade menu (US-AGENT-026) and `use` validation (US-AGENT-027).
_agents_installed() {
  local agent
  for agent in $_AGENT_REGISTRY_NAMES; do
    if _agent_installed_by_name "$agent"; then
      printf '%s\n' "$agent"
    fi
  done
  return 0
}

# Directory holding per-machine availability cache. Project-local so two
# checkouts on one machine do not share (and it never leaks into roll-meta).
# Honors ROLL_AGENT_CACHE_DIR (tests / sandboxes) first, then .roll/cache/.
_agent_cache_dir() {
  if [ -n "${ROLL_AGENT_CACHE_DIR:-}" ]; then
    printf '%s\n' "$ROLL_AGENT_CACHE_DIR"
    return 0
  fi
  printf '%s\n' ".roll/cache/agent-availability"
}

# Cache TTL in seconds (default 30 min). ROLL_AGENT_PROBE_TTL overrides.
_agent_probe_ttl() {
  local ttl="${ROLL_AGENT_PROBE_TTL:-1800}"
  case "$ttl" in
    ''|*[!0-9]*) ttl=1800 ;;
  esac
  printf '%s\n' "$ttl"
}

# One-shot, sub-second probe: does agent NAME's CLI respond to --version
# without an auth/network error? Returns 0 (online) / 1 (offline). Overridable
# for tests via ROLL_AGENT_PROBE_HOOK (a command receiving the agent name;
# its exit code is the verdict). Real probe: run the first known binary with
# --version under a short timeout; non-zero exit ⇒ offline (auth/network/crash).
_agent_probe() {
  local name; name="$(_canonical_agent_name "$1")"
  if [ -n "${ROLL_AGENT_PROBE_HOOK:-}" ]; then
    "$ROLL_AGENT_PROBE_HOOK" "$name"
    return $?
  fi
  local bins bin=""
  if bins=$(_agent_bin_names "$name" 2>/dev/null); then
    local b
    for b in $bins; do
      if command -v "$b" >/dev/null 2>&1; then bin="$b"; break; fi
    done
  fi
  # No PATH binary to probe (special-case/dir agents) → treat presence as
  # online; PATH detection in _agent_available already gated this path.
  [ -n "$bin" ] || return 0
  # Sub-second budget. `timeout` is GNU-only; fall back to a bare call when
  # absent (macOS) — the --version path returns near-instantly anyway.
  if command -v timeout >/dev/null 2>&1; then
    timeout 5 "$bin" --version >/dev/null 2>&1
  else
    "$bin" --version >/dev/null 2>&1
  fi
}

# Is agent NAME usable right now? Prints "online"/"offline", exits 0/1.
# Reads a fresh cache entry when present; otherwise probes and caches the
# result. ROLL_AGENT_NO_CACHE=1 forces a re-probe (still writes the cache).
_agent_available() {
  local name; name="$(_canonical_agent_name "$1")"
  [ -n "$name" ] || { printf 'offline\n'; return 1; }

  local cache_dir; cache_dir="$(_agent_cache_dir)"
  local cache_file="${cache_dir}/${name}"
  local ttl; ttl="$(_agent_probe_ttl)"
  local now; now="$(date +%s)"

  # Cache hit within TTL → trust it (unless caller forced a re-probe).
  if [ -z "${ROLL_AGENT_NO_CACHE:-}" ] && [ -f "$cache_file" ]; then
    local cached_at cached_status line
    cached_at=""; cached_status=""
    while IFS= read -r line || [ -n "$line" ]; do
      case "$line" in
        checked_at=*) cached_at="${line#checked_at=}" ;;
        status=*)     cached_status="${line#status=}" ;;
      esac
    done < "$cache_file"
    case "$cached_at" in ''|*[!0-9]*) cached_at="" ;; esac
    if [ -n "$cached_at" ] && [ -n "$cached_status" ]; then
      local age=$(( now - cached_at ))
      if [ "$age" -ge 0 ] && [ "$age" -lt "$ttl" ]; then
        printf '%s\n' "$cached_status"
        [ "$cached_status" = "online" ] && return 0 || return 1
      fi
    fi
  fi

  # Cache miss / expired / forced → ① PATH ② probe.
  local status="offline"
  if _agent_installed_by_name "$name" && _agent_probe "$name"; then
    status="online"
  fi

  # Persist the verdict (project-local, per-machine).
  mkdir -p "$cache_dir" 2>/dev/null || true
  if [ -d "$cache_dir" ]; then
    local tmp="${cache_file}.tmp.$$"
    {
      printf 'checked_at=%s\n' "$now"
      printf 'status=%s\n' "$status"
    } > "$tmp" 2>/dev/null && mv "$tmp" "$cache_file" 2>/dev/null || rm -f "$tmp" 2>/dev/null
  fi

  printf '%s\n' "$status"
  [ "$status" = "online" ] && return 0 || return 1
}

# Check if an AI tool is actually installed (back-compat shim around
# _agent_installed_by_name; preserves the dir-path-based signature used
# throughout bin/roll).
_is_ai_installed() {
  local ai_dir="$1"
  local bn
  bn="$(basename "$ai_dir" | sed 's/^\.//')"
  # Nested-dir layouts collapse to their parent agent name.
  case "$bn" in
    agent|workspace)
      bn="$(basename "$(dirname "$ai_dir")" | sed 's/^\.//')"
      ;;
  esac
  # Mirror ai_tool_name's alias normalization so detection routes to the
  # canonical agent record (e.g. ~/.gemini → agy, ~/.kimi-code → kimi).
  [[ "$bn" == "gemini" ]] && bn="agy"
  [[ "$bn" == "kimi-code" ]] && bn="kimi"
  _agent_installed_by_name "$bn" "$ai_dir"
}

# ─── Spinner: TTY-aware status display for long-running steps (US-REL-003) ───
# _spin_setup [off]
#   Opens FD 3 for spinner output. Without args: FD 3 → stdout if interactive,
#   else stderr (best-effort visibility for the operator). With "off": FD 3
#   → /dev/null, silencing all _spin output (useful for scripts that drive
#   release.sh in non-interactive contexts but still want exit codes).
_spin_setup() {
  if [ "${1:-}" = "off" ]; then
    exec 3>/dev/null
    return 0
  fi
  if [ -t 1 ]; then
    # Interactive: spinner shares stdout with normal program output.
    exec 3>&1
  else
    # Non-interactive (stdout redirected to file / piped): route the spinner
    # trail to stderr so it stays visible in CI logs and never pollutes the
    # caller's redirected stdout (e.g. > release_notes.txt). FD 3 will be
    # plain-text mode whenever stderr is also not a TTY.
    exec 3>&2
  fi
}

# _spin <label> <cmd> [args...]
#   Runs cmd with a status indicator written to FD 3 only. cmd's own
#   stdout (FD 1) and stderr (FD 2) pass through untouched, so caller-side
#   `>file` and `2>file` redirections on the _spin call behave normally.
#
#   FD 3 is a TTY (or ROLL_SPIN_FORCE_TTY=1):
#     ⠋ label [Ns]  (refreshed ~10/s, line-cleared with \r\033[2K)
#     final: ✓ label (Ns)   on success
#            ✗ label (rc=N, Ns) on failure
#
#   FD 3 not a TTY (CI, pipes, `2>&1 | tee`):
#     » label...   on start
#     done label (Ns)            on success
#     fail label (rc=N, Ns)      on failure
#
#   Returns wrapped cmd's exit code (transparent under `set -e` callers
#   when used in `if`/`&&`/`||` contexts).
_spin() {
  local _label="$1"
  shift

  local _spin_tty=0
  if [ "${ROLL_SPIN_FORCE_TTY:-}" = "1" ] || [ -t 3 ]; then
    _spin_tty=1
  fi

  local _spin_start=$SECONDS
  local _spin_pid=""

  if [ "$_spin_tty" = "1" ]; then
    # Bash 3.2-safe: indexed array of braille frames.
    local _spin_frames
    _spin_frames=( $'\xe2\xa0\x8b' $'\xe2\xa0\x99' $'\xe2\xa0\xb9' $'\xe2\xa0\xb8' $'\xe2\xa0\xbc' $'\xe2\xa0\xb4' $'\xe2\xa0\xa6' $'\xe2\xa0\xa7' $'\xe2\xa0\x87' $'\xe2\xa0\x8f' )
    # Print initial frame at t=0 so users see something immediately.
    printf '\r\033[2K%s %s [0s]' "${_spin_frames[0]}" "$_label" >&3 2>/dev/null || true
    # Only animate when FD 3 is a real TTY — when force-on for tests, skip
    # the background animator so the test gets deterministic output (the
    # initial frame, the cleared final frame).
    if [ -t 3 ]; then
      (
        set +e +u 2>/dev/null
        local _i=1
        local _t0=$_spin_start
        while :; do
          sleep 0.1
          local _e=$(( SECONDS - _t0 ))
          printf '\r\033[2K%s %s [%ds]' "${_spin_frames[$_i]}" "$_label" "$_e" >&3 2>/dev/null || true
          _i=$(( (_i + 1) % 10 ))
        done
      ) &
      _spin_pid=$!
      # Kill spinner on Ctrl-C / SIGTERM / EXIT. Save no prior trap because
      # release.sh and its callers install no traps of their own; if a
      # future caller does, _spin's scope is short and traps are restored
      # to default after reaping.
      # shellcheck disable=SC2064
      trap "kill ${_spin_pid} 2>/dev/null; wait ${_spin_pid} 2>/dev/null; trap - INT TERM EXIT; exit 130" INT TERM
      # shellcheck disable=SC2064
      trap "kill ${_spin_pid} 2>/dev/null; wait ${_spin_pid} 2>/dev/null" EXIT
    fi
  else
    printf '» %s...\n' "$_label" >&3 2>/dev/null || true
  fi

  local _spin_rc=0
  if [ "$#" -gt 0 ]; then
    "$@" || _spin_rc=$?
  fi

  if [ -n "$_spin_pid" ]; then
    kill "$_spin_pid" 2>/dev/null || true
    wait "$_spin_pid" 2>/dev/null || true
    trap - INT TERM EXIT
  fi

  local _spin_elapsed=$(( SECONDS - _spin_start ))

  if [ "$_spin_tty" = "1" ]; then
    if [ "$_spin_rc" -eq 0 ]; then
      printf '\r\033[2K✓ %s (%ds)\n' "$_label" "$_spin_elapsed" >&3 2>/dev/null || true
    else
      printf '\r\033[2K✗ %s (rc=%d, %ds)\n' "$_label" "$_spin_rc" "$_spin_elapsed" >&3 2>/dev/null || true
    fi
  else
    if [ "$_spin_rc" -eq 0 ]; then
      printf 'done %s (%ds)\n' "$_label" "$_spin_elapsed" >&3 2>/dev/null || true
    else
      printf 'fail %s (rc=%d, %ds)\n' "$_label" "$_spin_rc" "$_spin_elapsed" >&3 2>/dev/null || true
    fi
  fi

  return "$_spin_rc"
}

# ─── Helper: read config value ───────────────────────────────────────────────
# Read a nested `parent.child` value (one indent level) from a yaml file.
# Mirrors the awk block readers used for loop_schedule. Prints the raw value
# (inline comment + surrounding whitespace stripped) on stdout; empty when the
# parent block or the child key is absent. US-LOOP-033.
_yaml_read_nested() {
  local file="$1" parent="$2" child="$3"
  [[ -f "$file" ]] || return 0
  awk -v p="^${parent}:" -v c="^[[:space:]]+${child}:" '
    $0 ~ p { found=1; next }
    found && /^[^[:space:]]/ { exit }
    found && $0 ~ c {
      sub(/^[[:space:]]*[^:]*:[[:space:]]*/, "")
      sub(/[[:space:]]*#.*$/, "")
      sub(/[[:space:]]*$/, "")
      print
      exit
    }
  ' "$file" 2>/dev/null
}

config_get() {
  local key="$1"
  local default="${2:-}"
  # US-LOOP-033: nested `parent.child` keys (e.g. loop_schedule.period_minutes)
  # are read from the same global config file as flat keys.
  if [[ "$key" == *.* ]]; then
    local parent="${key%%.*}" child="${key#*.}"
    local nval
    nval=$(_yaml_read_nested "$ROLL_CONFIG" "$parent" "$child")
    if [[ -n "$nval" ]]; then
      echo "${nval/#\~/$HOME}"
      return
    fi
    echo "${default/#\~/$HOME}"
    return
  fi
  if [[ -f "$ROLL_CONFIG" ]]; then
    local val
    val=$(grep -E "^${key}:" "$ROLL_CONFIG" 2>/dev/null | head -1 | sed 's/^[^:]*:[[:space:]]*//' | sed 's/[[:space:]]*#.*$//' | sed 's/[[:space:]]*$//')
    if [[ -n "$val" ]]; then
      echo "${val/#\~/$HOME}"
      return
    fi
  fi
  echo "${default/#\~/$HOME}"
}

# ─── Helpers: read ai_* entries from config ──────────────────────────────────
# Returns one "dir|config_file|convention_src" line per ai_* key in config.yaml
_get_ai_tools() {
  grep -E "^ai_[a-z]+:" "$ROLL_CONFIG" 2>/dev/null | sed 's/^[^:]*:[[:space:]]*//' | while IFS= read -r entry; do
    echo "${entry/#\~/$HOME}"
  done
}

# Iterate all configured AI tools, calling: callback entry ai_dir ai_config ai_src [extra_args...]
_for_each_ai_tool() {
  local _feach_cb="$1"; shift
  while IFS= read -r _feach_entry; do
    "$_feach_cb" "$_feach_entry" \
      "$(_ai_dir "$_feach_entry")" \
      "$(_ai_config "$_feach_entry")" \
      "$(_ai_src "$_feach_entry")" \
      "$@"
  done < <(_get_ai_tools)
}

# Add any ai_* keys from the default set that are missing from the user's config.
# Non-destructive: never removes or modifies existing entries.
_ensure_config_entries() {
  [[ -f "$ROLL_CONFIG" ]] || return

  local -a default_keys=(
    "ai_claude:~/.claude|CLAUDE.md|CLAUDE.md"
    "ai_agy:~/.gemini|GEMINI.md|GEMINI.md"
    "ai_kimi:~/.kimi|AGENTS.md|AGENTS.md"
    "ai_kimi_code:~/.kimi-code|AGENTS.md|AGENTS.md"
    "ai_codex:~/.codex|AGENTS.md|AGENTS.md"
    "ai_cursor:~/.cursor|.cursor-rules|.cursor-rules"
    "ai_trae:~/.trae|user_rules.md|project_rules.md"
    "ai_opencode:~/.config/opencode|AGENTS.md|AGENTS.md"
    "ai_openclaw:~/.openclaw/workspace|AGENTS.md|AGENTS.md"
    "ai_pi:~/.pi/agent|AGENTS.md|AGENTS.md"
    "ai_deepseek:~/.deepseek|AGENTS.md|AGENTS.md"
    "ai_qwen:~/.qwen|AGENTS.md|AGENTS.md"
  )

  local added=0
  local tmp
  tmp="$(mktemp)"
  cp "$ROLL_CONFIG" "$tmp"

  for entry in "${default_keys[@]}"; do
    local key="${entry%%:*}"
    local val="${entry#*:}"
    if ! grep -qE "^${key}:" "$ROLL_CONFIG" 2>/dev/null; then
      if grep -q "^# User preferences" "$tmp" 2>/dev/null; then
        local new_tmp
        new_tmp="$(mktemp)"
        while IFS= read -r line; do
          [[ "$line" == "# User preferences" ]] && echo "${key}: ${val}" >> "$new_tmp"
          echo "$line" >> "$new_tmp"
        done < "$tmp"
        mv "$new_tmp" "$tmp"
      else
        echo "${key}: ${val}" >> "$tmp"
      fi
      added=$((added + 1))
      warn "$(msg shared.added_missing_config_entry ${key})"
    fi
  done

  if [[ $added -gt 0 ]]; then
    cp "$tmp" "$ROLL_CONFIG"
    ok "$(msg shared.config_updated_with_new_entries $added)"
  fi
  rm -f "$tmp"
}

# Extract fields from a "<dir>|<config>|<src>" entry
_ai_dir()    { echo "$1" | cut -d'|' -f1; }
_ai_config() { echo "$1" | cut -d'|' -f2; }
_ai_src()    { echo "$1" | cut -d'|' -f3; }

# ─── Helper: safe copy with overwrite prompt ─────────────────────────────────
safe_copy() {
  local src="$1"
  local dst="$2"
  local force="${3:-false}"

  if [[ ! -f "$src" ]]; then
    return
  fi

  local dst_dir
  dst_dir="$(dirname "$dst")"
  mkdir -p "$dst_dir"

  if [[ -f "$dst" ]] && [[ "$force" != "true" ]]; then
    if diff -q "$src" "$dst" &>/dev/null; then
      return  # identical, skip silently
    fi
    # Non-interactive (stdin is not a terminal): silently overwrite.
    # _run_setup_step / cmd_update redirect stdin to /dev/null and all
    # stdout/stderr is suppressed — prompting here would either hang on a
    # hidden read or silently default to overwrite. Be explicit.
    if [[ ! -t 0 ]]; then
      cp "$src" "$dst"
      ok "$(msg shared.wrote ${dst/#$HOME/~})"
      return
    fi
    echo ""
    warn "$(msg shared.file_exists_and_differs ${dst/#$HOME/~})"
    echo -e "  ${BOLD}Overwrite?${NC} [Y/n/d(iff)] "
    read -r answer || answer="Y"
    case "$answer" in
      d|D|diff)
        diff --color=auto "$dst" "$src" || true
        echo ""
        echo -e "  ${BOLD}Overwrite?${NC} [Y/n] "
        read -r answer2 || answer2="Y"
        [[ "$answer2" =~ ^[Nn]$ ]] && { info "$(msg shared.skipped ${dst/#$HOME/~})"; return; }
        ;;
      n|N) info "$(msg shared.skipped ${dst/#$HOME/~})"; return ;;
      *) ;;  # empty answer or 'y' / 'Y' → overwrite (default Yes)
    esac
  fi

  cp "$src" "$dst"
  ok "$(msg shared.wrote_2 ${dst/#$HOME/~})"
}

# ─── Internal: prune files in $1 that no longer exist in $2 ──────────────────
# Used to clean up stale files left behind when a previous version had them
# but the current package no longer ships them.
_prune_dir() {
  local installed_dir="$1"
  local source_dir="$2"
  local label="${3:-file}"
  [[ -d "$installed_dir" ]] || return 0

  local installed_f installed_fname
  for installed_f in "$installed_dir"/* "$installed_dir"/.*; do
    [[ -f "$installed_f" ]] || continue
    installed_fname="$(basename "$installed_f")"
    [[ "$installed_fname" == "." || "$installed_fname" == ".." ]] && continue
    if [[ ! -f "$source_dir/$installed_fname" ]]; then
      rm -f "$installed_f"
      info "$(msg shared.removed_stale $label ${installed_dir##*/} $installed_fname)"
    fi
  done
}

# ─── Internal: pull skills from repo → ~/.roll/skills ──────────────────────
_pull_skills() {
  if [[ ! -d "$ROLL_PKG_DIR/skills" ]]; then
    err "$(msg shared.skills_source_not_found_at_skills $ROLL_PKG_DIR)"
    return 1
  fi

  mkdir -p "$ROLL_HOME/skills"

  # Copy/update skills from repo → ~/.roll/skills/
  for skill_dir in "$ROLL_PKG_DIR"/skills/*/; do
    if [[ -d "$skill_dir" ]]; then
      local skill_name
      skill_name="$(basename "$skill_dir")"
      mkdir -p "$ROLL_HOME/skills/$skill_name"
      for f in "$skill_dir"*; do
        [[ -f "$f" ]] && cp "$f" "$ROLL_HOME/skills/$skill_name/$(basename "$f")"
      done
      # File-level prune (dir-level prune below catches whole-skill removals)
      _prune_dir "$ROLL_HOME/skills/$skill_name" "$skill_dir" "skill file"
    fi
  done

  # Prune skills that no longer exist in repo.
  # ~/.roll/skills/ is roll's controlled namespace — safe to clean up.
  for installed_dir in "$ROLL_HOME/skills"/*/; do
    [[ -d "$installed_dir" ]] || continue
    local installed_name
    installed_name="$(basename "$installed_dir")"
    if [[ ! -d "$ROLL_PKG_DIR/skills/$installed_name" ]]; then
      rm -rf "$installed_dir"
      info "$(msg shared.removed_stale_skill $installed_name)"
    fi
  done
}

# ─── Internal: pull conventions from repo → ~/.roll/conventions ────────────
_pull_conventions() {
  local force="${1:-false}"

  if [[ ! -d "$ROLL_PKG_CONVENTIONS" ]]; then
    err "$(msg shared.convention_source_not_found_at $ROLL_PKG_CONVENTIONS)"
    return 1
  fi

  mkdir -p "$ROLL_GLOBAL"
  mkdir -p "$ROLL_TEMPLATES"/{fullstack,frontend-only,backend-service,cli}

  info "$(msg shared.copying_global_conventions)"
  for f in "$ROLL_PKG_CONVENTIONS"/global/*; do
    [[ -f "$f" ]] && safe_copy "$f" "$ROLL_GLOBAL/$(basename "$f")" "$force"
  done
  for f in "$ROLL_PKG_CONVENTIONS"/global/.*; do
    [[ -f "$f" ]] && [[ "$(basename "$f")" != "." ]] && [[ "$(basename "$f")" != ".." ]] && \
      safe_copy "$f" "$ROLL_GLOBAL/$(basename "$f")" "$force"
  done
  # Prune stale files in ~/.roll/conventions/global/
  _prune_dir "$ROLL_GLOBAL" "$ROLL_PKG_CONVENTIONS/global" "convention"

  info "$(msg shared.copying_project_templates)"
  for tpl_dir in "$ROLL_PKG_CONVENTIONS"/templates/*/; do
    local tpl_name
    tpl_name="$(basename "$tpl_dir")"
    for f in "$tpl_dir"*; do
      [[ -f "$f" ]] && safe_copy "$f" "$ROLL_TEMPLATES/$tpl_name/$(basename "$f")" "$force"
    done
    for f in "$tpl_dir".*; do
      [[ -f "$f" ]] && [[ "$(basename "$f")" != "." ]] && [[ "$(basename "$f")" != ".." ]] && \
        safe_copy "$f" "$ROLL_TEMPLATES/$tpl_name/$(basename "$f")" "$force"
    done
    # Prune stale files in this template dir
    _prune_dir "$ROLL_TEMPLATES/$tpl_name" "$tpl_dir" "template file"
  done
}

# ─── Internal: install local cache from repo source ───────────────────────────
_install_local() {
  local force="${1:-false}"

  if [[ ! -d "$ROLL_PKG_CONVENTIONS" ]]; then
    err "$(msg shared.convention_source_not_found_at_2 $ROLL_PKG_CONVENTIONS)"
    err "$(msg shared.run_this_from_the_roll_repo)"
    exit 1
  fi

  _pull_conventions "$force"
  _pull_skills

  # Recreate config if it has no ai_* entries (covers legacy sync_* format and blank/broken configs)
  if [[ -f "$ROLL_CONFIG" ]] && ! grep -qE "^ai_[a-z]+:" "$ROLL_CONFIG" 2>/dev/null; then
    warn "$(msg shared.config_has_no_ai_entries_recreating)"
    cp "$ROLL_CONFIG" "${ROLL_CONFIG}.bak"
    info "$(msg shared.backup_saved_roll_config_yaml_bak)"
    rm "$ROLL_CONFIG"
  fi

  # Create config if it doesn't exist
  if [[ ! -f "$ROLL_CONFIG" ]]; then
    info "$(msg shared.creating_default_config)"
    cat > "$ROLL_CONFIG" << 'YAML'
# Roll Configuration
# Edit this file, then run `roll setup` to apply.

# AI tools — each entry controls both convention sync and skill linking
# Format: <name>: <dir>|<config_file>|<convention_src>
ai_claude: ~/.claude|CLAUDE.md|CLAUDE.md
ai_agy: ~/.gemini|GEMINI.md|GEMINI.md
ai_kimi: ~/.kimi|AGENTS.md|AGENTS.md
ai_kimi_code: ~/.kimi-code|AGENTS.md|AGENTS.md
ai_codex: ~/.codex|AGENTS.md|AGENTS.md
ai_cursor: ~/.cursor|.cursor-rules|.cursor-rules
ai_trae: ~/.trae|user_rules.md|project_rules.md
ai_opencode: ~/.config/opencode|AGENTS.md|AGENTS.md
ai_openclaw: ~/.openclaw/workspace|AGENTS.md|AGENTS.md
ai_pi: ~/.pi/agent|AGENTS.md|AGENTS.md
ai_deepseek: ~/.deepseek|AGENTS.md|AGENTS.md

# User preferences
default_language: zh
default_project_type: fullstack
editor: ${EDITOR:-vim}

# Loop schedule (24h format, machine local timezone)
# Minute fields auto-derive from project path hash when omitted — avoids contention across projects.
# active_start/active_end moved to per-project .roll/local.yaml loop_schedule block (default 0/24).
# loop_minute: 5        # omit to auto-derive from project hash
loop_dream_hour: 3
# loop_dream_minute: 10 # omit to auto-derive
primary_agent: claude
YAML
    ok "$(msg shared.created_roll_config_yaml)"

    # FIX-128: the heredoc template hardcodes `primary_agent: claude` for
    # the first-time case. Replace it with the first agent that actually
    # has its CLI on PATH so users without Claude installed don't get a
    # silently-broken default. If nothing detected, leave `claude` so the
    # user still has a clear handle to fix manually.
    local _detected_primary
    _detected_primary="$(_first_installed_agent || true)"
    if [[ -n "$_detected_primary" && "$_detected_primary" != "claude" ]]; then
      _replace_primary_agent "$_detected_primary"
      info "$(msg shared.primary_agent_auto_detected "$_detected_primary" 2>/dev/null \
             || echo "primary_agent → $_detected_primary (auto-detected from installed CLIs)")"
    fi
  fi

  # Ensure all expected ai_* keys exist (handles upgrades where new tools were added)
  _ensure_config_entries

}

# FIX-128: pick the first agent whose CLI is on PATH, scanning the same
# order the default config template lists them. Empty stdout when none
# detected; never errors.
_first_installed_agent() {
  local agent
  for agent in claude codex kimi deepseek qwen agy pi cursor opencode trae openclaw; do
    if _agent_installed_by_name "$agent"; then
      echo "$agent"
      return 0
    fi
  done
  return 1
}

# FIX-128: rewrite the `primary_agent:` line in $ROLL_CONFIG to the given
# value. Single-line in-place edit, preserves the rest of the file.
_replace_primary_agent() {
  local new="$1"
  [[ -f "$ROLL_CONFIG" && -n "$new" ]] || return 0
  local tmp; tmp="$(mktemp)"
  awk -v new="$new" '
    /^primary_agent:/ { print "primary_agent: " new; next }
    { print }
  ' "$ROLL_CONFIG" > "$tmp" && mv "$tmp" "$ROLL_CONFIG"
}

# ─── Internal: create or repair per-skill symlinks (non-destructive) ─────────
_link_skills() {
  local force="${1:-false}"
  local roll_skills_real pkg_skills_real
  roll_skills_real="$(canonical_dir "$ROLL_HOME/skills" 2>/dev/null || true)"
  pkg_skills_real="$(canonical_dir "$ROLL_PKG_DIR/skills" 2>/dev/null || true)"

  while IFS= read -r entry; do
    local ai_dir
    ai_dir="$(_ai_dir "$entry")"
    # FIX-128: detection is now binary-on-PATH, but skill linking keeps
    # the same Claude-always-syncs semantics as _apply_conventions and
    # tolerates pre-existing config dirs (an agent the user is mid-
    # upgrade or installed via nvm/asdf still has its convention dir;
    # we don't want to silently stop linking skills there). Strict
    # binary detection drives chooser logic (primary_agent /
    # _onboard_discover_agents) — see FIX-128.
    if [[ "$ai_dir" != "$HOME/.claude" ]] \
       && ! _is_ai_installed "$ai_dir" \
       && [[ ! -d "$ai_dir" ]]; then
      continue
    fi
    mkdir -p "$ai_dir"

    local ai_name ai_dir_real skills_dir
    ai_name="$(ai_tool_name "$ai_dir")"
    ai_dir_real="$(canonical_dir "$ai_dir" 2>/dev/null || true)"
    skills_dir="$ai_dir/skills"

    if [[ -n "$ai_dir_real" && \
          ( "$ai_dir_real" == "$ROLL_PKG_DIR" || "$ai_dir_real" == "$ROLL_PKG_DIR"/* ) ]]; then
      warn "$(msg shared.skipped_resolves_to_repo_refusing_to ${ai_name})"
      continue
    fi

    # Guard: resolve ALL symlink chains — block writing anywhere inside the repo
    local skills_real
    skills_real="$(canonical_dir "$skills_dir" 2>/dev/null || true)"
    if [[ -n "$skills_real" && -n "$pkg_skills_real" && \
          ( "$skills_real" == "$pkg_skills_real" || "$skills_real" == "$pkg_skills_real"/* ) ]]; then
      warn "$(msg shared.skipped_resolves_to_repo "${ai_name}" "$(lower_name "$ai_name")")"
      continue
    fi

    # Handle whole-dir symlink (legacy or user-created)
    if [[ -L "$skills_dir" ]]; then
      local skills_target
      skills_target="$(readlink "$skills_dir")"
      if [[ -n "$skills_real" && "$skills_real" == "$roll_skills_real" ]]; then
        continue  # Whole-dir symlink to ~/.roll/skills — still functional
      fi
      # Dangling whole-dir symlink — remove and recreate as per-skill links
      if [[ -z "$skills_real" ]]; then
        info "$(msg shared.removing_legacy_symlink_skills ${ai_name} ${skills_target/#$HOME/~})"
        rm "$skills_dir"
      else
        warn "$(msg shared.skipped_skills_unknown_symlink_target ${ai_name} ${skills_target/#$HOME/~})"
        continue
      fi
    fi

    mkdir -p "$skills_dir"
    skills_real="$(canonical_dir "$skills_dir" 2>/dev/null || true)"
    if [[ -n "$skills_real" && -n "$pkg_skills_real" && \
          ( "$skills_real" == "$pkg_skills_real" || "$skills_real" == "$pkg_skills_real"/* ) ]]; then
      warn "$(msg shared.skipped_skills_created_path_resolves_to ${ai_name})"
      continue
    fi
    local linked=0 repaired=0 pruned=0

    # Prune stale roll-* symlinks pointing to skills no longer in ~/.roll/skills/
    for link in "$skills_dir"/roll-*; do
      [[ -L "$link" ]] || continue
      local link_target
      link_target="$(readlink "$link")"
      # Only remove symlinks we own (pointing into our skills dir)
      if [[ "$link_target" == "$ROLL_HOME/skills/"* ]] && [[ ! -d "$link" ]]; then
        rm "$link"
        pruned=$((pruned + 1))
      fi
    done

    for skill_dir in "$ROLL_HOME/skills"/*/; do
      [[ -d "$skill_dir" ]] || continue
      local skill_name
      skill_name="$(basename "$skill_dir")"
      local skill_link="$skills_dir/$skill_name"

      if [[ -L "$skill_link" ]]; then
        local current_target
        current_target="$(readlink "$skill_link")"
        if [[ "$current_target" != "$skill_dir" ]]; then
          # macOS ln -sf follows symlinks-to-dirs and creates inside instead of
          # replacing — explicitly remove first to guarantee replacement.
          rm -f "$skill_link" && ln -s "$skill_dir" "$skill_link"
          repaired=$((repaired + 1))
        fi
        # correct symlink: skip silently
      elif [[ ! -e "$skill_link" ]]; then
        ln -s "$skill_dir" "$skill_link"
        linked=$((linked + 1))
      fi
      # real file/dir at that path: skip — never touch user content
    done
    if [[ $((linked + repaired + pruned)) -gt 0 ]]; then
      ok "$(msg shared.skills_linked_in_skills_new_repaired ${ai_name} ${linked} ${repaired} ${pruned})"
    fi
  done < <(_get_ai_tools)
}

# ─── Internal: sync conventions via @include — never overwrites user files ─────
# Writes WK content to {ai_dir}/roll.md, appends @roll.md to main config.
_sync_convention_for_tool() {
  local src="$1"       # source: ~/.roll/conventions/global/CLAUDE.md
  local main_dst="$2"  # target: ~/.claude/CLAUDE.md
  local force="$3"

  [[ -f "$src" ]] || return 0
  local dst_dir
  dst_dir="$(dirname "$main_dst")"

  # Only proceed if Claude (always), the tool is installed (binary-on-PATH
  # per FIX-128), or the convention dir already exists (mid-upgrade /
  # nvm-installed binaries that aren't on this shell's PATH still get
  # their convention refresh).
  if [[ "$dst_dir" != "$HOME/.claude" ]] \
     && ! _is_ai_installed "$dst_dir" \
     && [[ ! -d "$dst_dir" ]]; then
    return
  fi
  mkdir -p "$dst_dir"

  # Write/update roll.md — this is our file, always safe to overwrite
  local wk_file="$dst_dir/roll.md"
  if [[ "$force" == "true" ]] || ! diff -q "$src" "$wk_file" &>/dev/null 2>&1; then
    cp "$src" "$wk_file"
    ok "$(msg shared.wrote_3 ${wk_file/#$HOME/~})"
  fi

  # Append @roll.md include to main config — never overwrite existing content
  if [[ ! -f "$main_dst" ]]; then
    printf '@roll.md\n' > "$main_dst"
    ok "$(msg shared.created ${main_dst/#$HOME/~})"
  elif ! grep -qF "@roll.md" "$main_dst" 2>/dev/null; then
    printf '\n@roll.md\n' >> "$main_dst"
    ok "$(msg shared.appended_roll_md_to ${main_dst/#$HOME/~})"
  else
    ok "$(msg shared.already_included ${main_dst/#$HOME/~})"
  fi
}

_sync_one_tool() {
  local _entry="$1" _ai_dir="$2" _cfg="$3" _src="$4" force="$5"
  _sync_convention_for_tool "$ROLL_GLOBAL/$_src" "$_ai_dir/$_cfg" "$force"
}

_sync_conventions() {
  local force="${1:-false}"
  _for_each_ai_tool _sync_one_tool "$force"
}

# ─── Internal: sync skills (pull + link) ──────────────────────────────────────
_sync_skills() {
  local force="${1:-false}"
  info "$(msg shared.updating_skills)"
  _pull_skills
  ok "$(msg shared.skills_updated_in_roll_skills)"
  info "$(msg shared.creating_skill_symlinks_for_ai_tools)"
  _link_skills "$force"
}

# ═══════════════════════════════════════════════════════════════════════════════
# COMMAND: setup [--force]
# Initialize ~/.roll/ and sync everything to AI tools in one step
# ═══════════════════════════════════════════════════════════════════════════════
# Ensures tmux is available (US-AUTO-026 promoted it from soft to required
# dependency for visible loop runs). On macOS attempts `brew install tmux`
# when brew exists; elsewhere prints the install command. Never fails the
# setup main flow — returns 0 even if install was not possible so the rest
# of `roll setup` proceeds.
_ensure_tmux() {
  if command -v tmux >/dev/null 2>&1; then
    return 0
  fi

  local os; os="$(uname)"
  if [[ "$os" == "Darwin" ]]; then
    if command -v brew >/dev/null 2>&1; then
      info "$(msg shared.tmux_not_found_installing_via_brew)"
      if brew install tmux >/dev/null 2>&1; then
        ok "$(msg shared.tmux_installed_tmux)"
        return 0
      fi
      warn "$(msg shared.brew_install_tmux_failed_install_manually)"
      return 0
    fi
    warn "$(msg shared.tmux_required_but_brew_not_available)"
    return 0
  fi

  warn "$(msg shared.tmux_required_install_via_your_package)"
  return 0
}

# FIX-075: snapshot the content of watched directories so cmd_setup can detect
# whether a step actually changed any file. Uses `cksum` (mtime-independent) so
# a re-copy with identical content is recognised as a no-op even when the inner
# helper rewrites the file. Watch is a colon-separated list of directories;
# missing dirs are skipped silently.
# FIX-079: also track symlinks (`_link_skills` only creates symlinks) and
# directories (`_peer_ensure_state_dir` only creates dirs). Without these, a
# step that did real work but produced no regular file would falsely render
# as ↷ on a brand-new install.
_setup_snapshot() {
  local watch="$1"
  local -a dirs
  IFS=':' read -r -a dirs <<<"$watch"
  local d
  {
    for d in "${dirs[@]}"; do
      [[ -d "$d" ]] || continue
      find "$d" -type f -print0 2>/dev/null | xargs -0 cksum 2>/dev/null
      while IFS= read -r l; do
        printf 'L %s -> %s\n' "$l" "$(readlink "$l")"
      done < <(find "$d" -type l 2>/dev/null)
      find "$d" -type d -print 2>/dev/null
    done
  } | sort
}

# FIX-075: run a setup step and report changed/unchanged/failed via the global
# _ROLL_SETUP_STATE. Caller passes the watch dir(s) plus the command + args.
# stdout/stderr of the inner command are suppressed (same as the previous
# pattern in cmd_setup) to keep the v2 UI render the only user-visible output.
# US-INFRA-008: ensure core.hooksPath is set to 'hooks' so TCR pre-commit gate
# is never silently bypassed in new clones, worktrees, or automated environments.
# Idempotent: already set to a non-default value → leave it (user knows better).
# Not a git repo → silently skip.
_ensure_hooks_path() {
  local repo_path="${1:-$PWD}"
  # Must be a git repo
  git -C "$repo_path" rev-parse --git-dir >/dev/null 2>&1 || return 0
  local current; current=$(git -C "$repo_path" config core.hooksPath 2>/dev/null || echo "")
  # Only set when unset or pointing at the git default (.git/hooks)
  if [[ -z "$current" || "$current" == ".git/hooks" ]]; then
    git -C "$repo_path" config core.hooksPath hooks 2>/dev/null || true
  fi
  return 0
}

_run_setup_step() {
  local watch="$1"; shift
  local before after
  before=$(_setup_snapshot "$watch")
  if "$@" </dev/null >/dev/null 2>&1; then
    after=$(_setup_snapshot "$watch")
    if [[ "$before" == "$after" ]]; then
      _ROLL_SETUP_STATE="unchanged"
    else
      _ROLL_SETUP_STATE="changed"
    fi
  else
    _ROLL_SETUP_STATE="failed"
  fi
}

cmd_setup() {
  local force=false
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --force|-f) force=true; shift ;;
      *) err "$(msg setup.unknown_argument_1)"; exit 1 ;;
    esac
  done

  # P1-3.4 (v3 branch): skills/ is a git submodule — populate it for git clones
  # whose skills/ is still empty. No-op for npm/curl installs (no .gitmodules).
  if [[ -d "$ROLL_PKG_DIR/.git" ]] && [[ -f "$ROLL_PKG_DIR/.gitmodules" ]] \
     && [[ -z "$(ls -A "$ROLL_PKG_DIR/skills" 2>/dev/null)" ]]; then
    ( cd "$ROLL_PKG_DIR" && git submodule update --init --recursive --quiet ) || true
  fi

  # Capture per-step outcomes for the v2 UI render at the end.
  local steps_buf=()
  _record() { steps_buf+=("$1|$2"); }

  # Map snapshot-detected state to v2 UI marker. -f rewrites "changed" to
  # "forced" so the user can tell a forced reinstall apart from a fresh
  # install — both produce diff'd files, only -f was explicitly requested.
  _state_to_marker() {
    local s="$1"
    case "$s" in
      changed)   [[ "$force" == "true" ]] && echo forced || echo ok ;;
      unchanged) echo skip ;;
      failed)    echo fail ;;
      *)         echo fail ;;
    esac
  }

  local _ai_dirs="$HOME/.claude:$HOME/.gemini:$HOME/.kimi:$HOME/.kimi-code:$HOME/.codex:$HOME/.cursor:$HOME/.trae:$HOME/.config/opencode:$HOME/.openclaw:$HOME/.pi:$HOME/.deepseek:$HOME/.qwen"

  _run_setup_step "$ROLL_HOME" _install_local "$force"
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Install templates & conventions to ~/.roll"

  _run_setup_step "$_ai_dirs" _sync_conventions "$force"
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Sync conventions to AI tools"

  _run_setup_step "$_ai_dirs" _sync_skills "$force"
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Install skills to ~/.claude"

  _run_setup_step "$ROLL_HOME/.peer-state" _peer_ensure_state_dir
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Initialize peer-review state directory"

  # US-INFRA-008: configure git hooks path so TCR pre-commit gate works in this repo
  _run_setup_step "$PWD" _ensure_hooks_path
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Configure git hooks path"

  if command -v tmux >/dev/null 2>&1; then
    _record skip "Ensure tmux is installed (already present)"
  else
    if _ensure_tmux >/dev/null 2>&1 && command -v tmux >/dev/null 2>&1; then
      _record ok "Ensure tmux is installed"
    else
      _record fail "Ensure tmux is installed"
    fi
  fi

  # FIX-078: launchd plist 安装从 setup 里拿掉——plist 是 per-project 资源，
  # setup 是全局安装阶段，不应该给 cwd 留 disabled 的占位。需要时 cmd_init /
  # _loop_on 各自会调 _install_launchd_plists。

  _emit_setup_v2_ui "${steps_buf[@]}"
}

# FIX-073: Render the cmd_setup v2 UI from per-step outcomes captured above.
# FIX-075: footer composition depends on how many steps actually changed —
#   all unchanged → "no changes"; some forced (~) → "re-installed (forced)";
#   any failed → "Setup incomplete"; otherwise → "X items refreshed".
_emit_setup_v2_ui() {
  local color_flag=""
  if [[ -n "${NO_COLOR:-}" ]] || ! [ -t 1 ]; then
    color_flag="--no-color"
  fi

  python3 - "$@" <<'PY' \
    | python3 "${ROLL_PKG_DIR}/lib/roll-setup.py" $color_flag
import json, sys
entries = sys.argv[1:]
steps = []
for i, entry in enumerate(entries, start=1):
    status, _sep, label = entry.partition("|")
    steps.append({"num": i, "label": label, "status": status})

n_failed   = sum(1 for s in steps if s["status"] == "fail")
n_forced   = sum(1 for s in steps if s["status"] == "forced")
n_changed  = sum(1 for s in steps if s["status"] == "ok")

if n_failed:
    footer_status = "fail"
    label = "Setup incomplete"
    hint = None
elif n_forced:
    footer_status = "ok"
    label = f"Setup re-installed (forced — {n_forced} item{'s' if n_forced != 1 else ''})"
    hint = "run roll init inside a project"
elif n_changed == 0:
    footer_status = "ok"
    label = "Setup complete (no changes)"
    hint = "everything already up to date"
else:
    footer_status = "ok"
    label = f"Setup complete ({n_changed} item{'s' if n_changed != 1 else ''} refreshed)"
    hint = "run roll init inside a project"

payload = {
    "header_label": "SETUP",
    "subtitle":     "初始化",
    "steps":        steps,
    "footer": {
        "status": footer_status,
        "label":  label,
        "hint":   hint,
    },
}
print(json.dumps(payload))
PY
}

# ─── PR pipeline hint ────────────────────────────────────────────────────────
# US-AUTO-035: print the one-time branch-protection command that flips repo
# from path A (CI gate only) to path C (CI + AI review double gate). Reading
# this hint is opt-in; the command is destructive (changes branch protection)
# so it is never run automatically.
_print_pr_pipeline_hint() {
  cat <<'HINT'

  Optional — enable AI review as a hard merge gate (path C).
  可选 —— 启用 AI 评审作为合并双门（路径 C）。

  Run once per repo (requires admin token), then claude-code-review.yml
  approvals become a required merge gate alongside CI:
  每个仓库执行一次（需要管理员 token），之后 claude-code-review.yml 的
  approve 将与 CI 一起成为合并必经的双门：

      gh api -X PATCH repos/<owner>/<repo>/branches/main/protection \
        -f required_pull_request_reviews.required_approving_review_count=1

  Escape hatch: add [skip-ai-review] to a PR body, or include
  SKIP_AI_REVIEW in any commit message, to bypass AI review for that PR.
  紧急通道：在 PR body 加 [skip-ai-review]，或在任一 commit message
  里包含 SKIP_AI_REVIEW，可对该 PR 绕过 AI 评审。

HINT
}

# ─── US-SKILL-016: skills catalog (single source of truth) ───────────────────
# guide/skills.md is a GENERATED catalog projected from skills/*/SKILL.md
# frontmatter (name + description). Adding/removing a skill regenerates the
# catalog — no hand-editing. `roll skills check` (and `roll doctor`) verify the
# repo copy matches a fresh scan so manual edits can't drift back in.
#
# bash 3.2 compatible: no declare -A / mapfile / ${var^^}. Frontmatter is a
# fixed `name:`/`description:` two-field block delimited by `---` lines; we read
# only the first frontmatter block of each SKILL.md.

# Extract a single scalar frontmatter field ($2) from a SKILL.md file ($1).
# Returns the value with surrounding single/double quotes stripped. Only the
# first `---`-delimited block is scanned. Handles YAML block scalars
# (`field: |` / `field: >`): subsequent more-indented lines are folded into a
# single space-separated string. Empty string if the field is absent.
_skill_frontmatter_field() {
  local file="$1" field="$2"
  awk -v field="$field" '
    NR == 1 && $0 != "---" { exit }
    NR == 1 { infm = 1; next }

    # Once we are collecting a block scalar, gather indented continuation lines.
    collecting {
      # A non-indented line (or the closing ---, ignoring trailing space) ends
      # the block.
      if ($0 ~ /^[^ \t]/ || $0 ~ /^---[ \t]*$/) {
        print val
        printed = 1
        exit
      }
      line = $0
      sub(/^[ \t]+/, "", line)
      sub(/[ \t]+$/, "", line)
      if (line == "") next
      val = (val == "" ? line : val " " line)
      next
    }

    infm && $0 == "---" { exit }
    infm {
      idx = index($0, ":")
      if (idx == 0) next
      key = substr($0, 1, idx - 1)
      v = substr($0, idx + 1)
      gsub(/^[ \t]+|[ \t]+$/, "", key)
      if (key != field) next
      sub(/^[ \t]+/, "", v)
      sub(/[ \t]+$/, "", v)
      # Block scalar indicator → fold following indented lines.
      if (v == "|" || v == ">" || v == "|-" || v == ">-" || v == "|+" || v == ">+") {
        collecting = 1
        val = ""
        next
      }
      # Strip a single matching pair of surrounding quotes.
      if (v ~ /^".*"$/ || v ~ /^'\''.*'\''$/) {
        v = substr(v, 2, length(v) - 2)
      }
      print v
      exit
    }

    # EOF reached mid-block-scalar (no closing --- / non-indented line): still
    # emit what we collected so a file that ends inside the block is not lost.
    END {
      if (collecting && !printed) print val
    }
  ' "$file"
}

# Emit the generated catalog markdown to stdout. Skills are listed in stable
# (lexicographic) directory order so the output is deterministic across runs
# and machines.
_skills_catalog_generate() {
  local skills_dir="${1:-$ROLL_PKG_DIR/skills}"
  echo "# Roll Skill Catalog"
  echo ""
  echo "> GENERATED by \`roll skills generate\` — do not edit by hand."
  echo "> 由 \`roll skills generate\` 生成 — 请勿手工编辑。"
  echo ">"
  echo "> Source of truth: each skill's \`skills/<name>/SKILL.md\` frontmatter."
  echo "> 事实源：各 skill 的 \`skills/<name>/SKILL.md\` frontmatter。"
  echo ""
  echo "| Skill | Description |"
  echo "|-------|-------------|"
  local skill_dir name desc
  for skill_dir in "$skills_dir"/*/; do
    [[ -d "$skill_dir" ]] || continue
    [[ -f "${skill_dir}SKILL.md" ]] || continue
    name="$(_skill_frontmatter_field "${skill_dir}SKILL.md" name)"
    [[ -n "$name" ]] || name="$(basename "$skill_dir")"
    desc="$(_skill_frontmatter_field "${skill_dir}SKILL.md" description)"
    # Escape pipe chars so a description with `|` doesn't break the table.
    desc="${desc//|/\\|}"
    printf '| `%s` | %s |\n' "$name" "$desc"
  done
}

# Path of the committed catalog product.
_skills_catalog_path() {
  printf '%s' "${ROLL_PKG_DIR}/guide/skills.md"
}

cmd_skills() {
  local sub="${1:-}"
  shift || true
  case "$sub" in
    generate|gen)
      _skills_catalog_generate > "$(_skills_catalog_path)"
      info "$(msg skills.generated "$(_skills_catalog_path)")"
      ;;
    check)
      local target; target="$(_skills_catalog_path)"
      if [[ ! -f "$target" ]]; then
        err "$(msg skills.check_missing "$target")"
        return 1
      fi
      # Generate ONCE into a temp file so the pass/fail decision and the
      # printed diff are computed against identical content — a concurrent
      # edit of skills/ between two scans can't make them disagree.
      local fresh; fresh="$(mktemp)"
      _skills_catalog_generate > "$fresh"
      if diff -u "$target" "$fresh" >/dev/null 2>&1; then
        rm -f "$fresh"
        info "$(msg skills.check_ok)"
        return 0
      fi
      err "$(msg skills.check_drift "$target")"
      diff -u "$target" "$fresh" || true
      rm -f "$fresh"
      return 1
      ;;
    ""|help|-h|--help)
      echo "$(msg skills.usage)"
      ;;
    *)
      err "$(msg skills.unknown_sub "$sub")"
      echo "$(msg skills.usage)"
      return 1
      ;;
  esac
}

# ─── Doctor: PR review extras section (US-PR-004) ────────────────────────────
# `roll doctor` is the single home for "things you could tune". The PR review
# extras section probes whether the two optional gates are enabled and only
# prints install commands for the ones that aren't, so users who already opted
# in (or opted out) don't get spammed each upgrade.
cmd_doctor() {
  _doctor_agent_section
  _doctor_pr_section
  _doctor_skills_catalog_section
  _doctor_launchd_stale_section
}

# US-SKILL-016: drift guard. Warn (do not fail doctor) if guide/skills.md no
# longer matches a fresh scan of skills/*/SKILL.md — the authoritative check
# is `roll skills check` (used in CI), this is the human-facing nudge.
_doctor_skills_catalog_section() {
  [[ -d "$ROLL_PKG_DIR/skills" ]] || return 0
  local target; target="$(_skills_catalog_path)"
  echo ""
  echo "$(ROLL_LANG_RESOLVED=en msg skills.doctor_heading)"
  echo "$(ROLL_LANG_RESOLVED=zh msg skills.doctor_heading)"
  if [[ ! -f "$target" ]] || ! diff -u "$target" <(_skills_catalog_generate) >/dev/null 2>&1; then
    echo "  $(msg skills.doctor_drift)"
  else
    echo "  $(msg skills.doctor_ok)"
  fi
  return 0
}

# FIX-128: list every ai_* entry from config, tag each with binary-on-PATH
# status and config-dir existence so the user can see at a glance which
# agents are actually usable vs only have Roll-maintained dirs.
_doctor_agent_section() {
  [[ -f "$ROLL_CONFIG" ]] || return 0
  echo ""
  echo "$(ROLL_LANG_RESOLVED=en msg doctor.agent_detection)"
  echo "$(ROLL_LANG_RESOLVED=zh msg doctor.agent_detection)"
  echo ""
  local _key _value _name _dir _installed _dir_exists _is_primary
  _is_primary=$(grep -E '^primary_agent:' "$ROLL_CONFIG" 2>/dev/null | sed 's/^primary_agent: *//')
  while IFS=: read -r _key _value; do
    [[ "$_key" =~ ^ai_ ]] || continue
    _name="${_key#ai_}"
    [[ "$_name" == "kimi_code" ]] && continue  # dedupe
    _dir="${_value%%|*}"
    _dir="${_dir# }"
    _dir="${_dir/#\~/$HOME}"
    if _agent_installed_by_name "$_name" "$_dir"; then
      _installed="$(msg doctor.agent_installed)"
    else
      _installed="$(msg doctor.agent_missing)"
    fi
    if [[ -d "$_dir" ]]; then
      _dir_exists="$(msg doctor.agent_dir_exists)"
    else
      _dir_exists="$(msg doctor.agent_dir_missing)"
    fi
    local _tag=""
    [[ "$_name" == "$_is_primary" ]] && _tag="  ($(msg doctor.agent_primary_label))"
    printf "  %-10s  %-14s  %s%s\n" "$_name" "$_installed" "$_dir_exists" "$_tag"
  done < "$ROLL_CONFIG"
  return 0
}

# FIX-097: scan ${_LAUNCHD_DIR}/com.roll.*.plist for entries whose
# WorkingDirectory no longer exists on disk. These are the ghost agents left
# behind when a user manually reproduces a bug under /private/tmp/ or
# /var/folders/ — the auto-sandbox redirects plist writes but launchctl
# bootstrap (before this fix) registered them anyway. Print labels +
# cleanup hint; never auto-delete (host launchctl state is user-owned).
_doctor_launchd_stale_section() {
  [[ "$(uname)" == "Darwin" ]] || return 0
  local dir="${_LAUNCHD_DIR:-${HOME}/Library/LaunchAgents}"
  [[ -d "$dir" ]] || return 0

  local found=0 plist label wd
  for plist in "$dir"/com.roll.*.plist; do
    [[ -e "$plist" ]] || continue
    wd=$(awk '
      /<key>WorkingDirectory<\/key>/ { getline; gsub(/.*<string>|<\/string>.*/, ""); print; exit }
    ' "$plist" 2>/dev/null)
    [[ -n "$wd" ]] || continue
    [[ -d "$wd" ]] && continue
    if [[ "$found" -eq 0 ]]; then
      echo ""
      echo "$(msg doctor.stale_plists)"
      echo ""
      found=1
    fi
    label=$(basename "$plist" .plist)
    echo "  ⚠ ${label}"
    echo "    WorkingDirectory missing: ${wd}"
    echo "    $(msg doctor.stale_plists_cleanup): launchctl bootout gui/$(id -u)/${label}; rm '${plist}'"
  done
  return 0
}

_doctor_pr_section() {
  git rev-parse --is-inside-work-tree >/dev/null 2>&1 || return 0

  echo ""
  echo "$(ROLL_LANG_RESOLVED=en msg doctor.pr_review_extras)"
  echo "$(ROLL_LANG_RESOLVED=zh msg doctor.pr_review_extras)"
  echo ""

  local protection_state event_state
  protection_state="$(_doctor_branch_protection_state)"
  event_state="$(_doctor_event_workflow_state)"

  case "$protection_state" in
    enabled)
      echo "  $(msg doctor.pr_double_gate_enabled)"
      ;;
    disabled)
      echo "  $(msg doctor.pr_double_gate_disabled)"
      _print_pr_pipeline_hint
      ;;
    *)
      echo "  $(msg doctor.pr_double_gate_unknown)"
      _print_pr_pipeline_hint
      ;;
  esac

  case "$event_state" in
    present)
      echo "  $(msg doctor.pr_event_enabled)"
      ;;
    *)
      echo "  $(msg doctor.pr_event_disabled)"
      _print_pr_event_hint
      ;;
  esac
}

# Returns one of: enabled | disabled | unknown
_doctor_branch_protection_state() {
  command -v gh >/dev/null 2>&1 || { echo unknown; return; }

  local slug
  slug="$(gh repo view --json owner,name --jq '.owner.login + "/" + .name' 2>/dev/null)"
  [[ -n "$slug" ]] || { echo unknown; return; }

  local required
  required="$(gh api "repos/${slug}/branches/main/protection" \
    --jq '.required_pull_request_reviews.required_approving_review_count // 0' \
    2>/dev/null)"

  if [[ -z "$required" ]]; then
    echo unknown
  elif (( required >= 1 )); then
    echo enabled
  else
    echo disabled
  fi
}

# Returns one of: present | absent
_doctor_event_workflow_state() {
  if [[ -f ".github/workflows/pr-review-event.yml" ]]; then
    echo present
  else
    echo absent
  fi
}

_print_pr_event_hint() {
  echo ""
  echo "  $(msg doctor.pr_event_optional)"
  echo "  $(msg doctor.pr_event_optional_zh)"
  echo ""
  echo "  $(msg doctor.pr_event_without)"
  echo "  $(msg doctor.pr_event_without_zh)"
  echo ""
  echo "      cp templates/workflows/pr-review-event.yml .github/workflows/"
  echo ""
  echo "  $(msg doctor.pr_event_secret)"
  echo "  $(msg doctor.pr_event_secret_zh)"
  echo ""
}

# ═══════════════════════════════════════════════════════════════════════════════
# COMMAND: update
# Thin wrapper: upgrade the npm-installed package, then re-sync via setup.
# Equivalent to: npm install -g @seanyao/roll@latest && roll setup
# ═══════════════════════════════════════════════════════════════════════════════
# ─── Helper: resolve latest version tag from GitHub releases ───────────────────
_resolve_remote_version() {
  if [[ -n "${ROLL_VERSION:-}" ]]; then
    echo "$ROLL_VERSION"
    return 0
  fi

  local latest
  latest=$(curl -fsSL \
    -H "Accept: application/vnd.github+json" \
    "https://api.github.com/repos/seanyao/roll/releases/latest" \
    | grep '"tag_name"' | sed 's/.*"tag_name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' 2>/dev/null) || true

  if [[ -z "$latest" ]]; then
    err "Failed to resolve latest version from GitHub."
    echo "You can pin a version with ROLL_VERSION=vX.Y.Z" >&2
    return 1
  fi

  echo "$latest"
}

# ─── Helper: download release tarball and atomically swap into place ───────────
_download_and_install_curl() {
  local tag="$1"
  local url="https://github.com/seanyao/roll/archive/refs/tags/${tag}.tar.gz"
  local tmp_dir
  tmp_dir="$(mktemp -d)"

  info "[roll] Downloading roll $tag ..."
  if ! curl -fsSL "$url" -o "${tmp_dir}/roll.tar.gz"; then
    rm -rf "$tmp_dir"
    err "$(msg update.curl_download_failed)"
    return 1
  fi

  info "[roll] Extracting ..."
  mkdir -p "${tmp_dir}/extract"
  if ! tar -xzf "${tmp_dir}/roll.tar.gz" --strip-components=1 -C "${tmp_dir}/extract"; then
    rm -rf "$tmp_dir"
    err "$(msg update.curl_extract_failed)"
    return 1
  fi

  # Preserve install-method marker (not in tarball)
  if [[ -f "$ROLL_PKG_DIR/.install-method" ]]; then
    cp "$ROLL_PKG_DIR/.install-method" "${tmp_dir}/extract/.install-method" 2>/dev/null || true
  fi

  # Atomic swap: backup old → move new → remove backup
  local backup
  backup="${ROLL_PKG_DIR}.bak.$$"
  mv "$ROLL_PKG_DIR" "$backup"
  mv "${tmp_dir}/extract" "$ROLL_PKG_DIR"
  rm -rf "$backup"
  rm -rf "$tmp_dir"

  chmod +x "$ROLL_PKG_DIR/bin/roll"
}

_check_installed_version_or_retry() {
  local expected installed pkg_dir
  expected="$(npm view @seanyao/roll version 2>/dev/null || true)"
  pkg_dir="$(npm root -g 2>/dev/null || true)"
  installed="$(grep "^VERSION=" "${pkg_dir}/@seanyao/roll/bin/roll" 2>/dev/null | sed 's/VERSION="\([^"]*\)"/\1/' || true)"

  [[ -z "$expected" || -z "$installed" ]] && return 0

  if [[ "$installed" != "$expected" ]]; then
    warn "$(msg update.version_mismatch "$installed" "$expected")"
    npm cache clean --force &>/dev/null || true
    npm install -g @seanyao/roll@latest &>/dev/null || true
    local after
    after="$(grep "^VERSION=" "${pkg_dir}/@seanyao/roll/bin/roll" 2>/dev/null | sed 's/VERSION="\([^"]*\)"/\1/' || true)"
    if [[ -n "$after" && "$after" != "$expected" ]]; then
      warn "$(msg update.still_mismatch "$after")"
    fi
  fi
}

cmd_update() {
  info "$(msg update.current_version "$VERSION")"

  local install_method="npm"
  if [[ -f "$ROLL_PKG_DIR/.install-method" ]]; then
    install_method="$(cat "$ROLL_PKG_DIR/.install-method" 2>/dev/null || echo "npm")"
  fi

  if [[ "$install_method" == "curl" ]]; then
    info "$(msg update.upgrading_via_curl)"
    echo ""

    local tag
    tag="$(_resolve_remote_version)"
    if [[ -z "$tag" ]]; then
      exit 1
    fi

    if ! _download_and_install_curl "$tag"; then
      exit 1
    fi

    local new_version
    new_version="$(grep "^VERSION=" "$ROLL_PKG_DIR/bin/roll" 2>/dev/null | sed 's/VERSION="\([^"]*\)"/\1/' || true)"
    if [[ -n "$new_version" ]]; then
      info "$(msg update.new_version "$new_version")"
    fi
  else
    info "$(msg update.upgrading_via_npm)"
    echo ""

    if ! npm install -g @seanyao/roll@latest; then
      err "$(msg update.npm_install_failed_check_network_proxy)"
      exit 1
    fi

    _check_installed_version_or_retry
  fi

  # FIX-166: the running version just changed — invalidate the stale update-check
  # cache so we don't reverse-nag the new version to "upgrade" to the old latest.
  _invalidate_update_cache

  echo ""
  info "$(msg update.re_syncing_to_ai_tools)"
  echo ""
  cmd_setup

  echo ""
  _show_changelog
}

# ─── Helper: merge global AGENTS.md into project (no type prompt) ────────────
# Fresh project: copies global AGENTS.md.
# Existing AGENTS.md: appends any ## sections missing from global.
_merge_global_to_project() {
  local project_dir="$1"
  local src="$ROLL_GLOBAL/AGENTS.md"
  local dst="$project_dir/AGENTS.md"

  [[ -f "$src" ]] || { warn "Global AGENTS.md not found at ${src/#$HOME/~}"; return; }

  # Detect project type — controls which sections are included
  local project_type skip_frontend=false
  project_type="$(scan_project_type_from_files "$project_dir")"
  case "$project_type" in
    cli|backend-service|unknown) skip_frontend=true ;;
  esac

  if [[ ! -f "$dst" ]]; then
    # Fresh create: write sections filtered by project type
    local fc_h="" fc_b="" fc_pre=true fc_want=true
    while IFS= read -r fc_line; do
      if [[ "$fc_line" =~ ^##\  ]]; then
        if [[ -n "$fc_h" && "$fc_want" == "true" ]]; then
          printf '%s\n%s' "$fc_h" "$fc_b" >> "$dst"
        fi
        fc_h="$fc_line"; fc_b=""; fc_pre=false
        fc_want=true
        [[ "$skip_frontend" == "true" && "$fc_h" == "## 7. Frontend Default Stack" ]] && fc_want=false
      elif [[ "$fc_pre" == "true" ]]; then
        printf '%s\n' "$fc_line" >> "$dst"
      else
        fc_b+="$fc_line"$'\n'
      fi
    done < "$src"
    if [[ -n "$fc_h" && "$fc_want" == "true" ]]; then
      printf '%s\n%s' "$fc_h" "$fc_b" >> "$dst"
    fi
    ok "Created: AGENTS.md"
    _ROLL_MERGE_SUMMARY+=("created|AGENTS.md")
    return
  fi

  # Section-merge: append any ## sections from global missing in project
  local added=0 cur_h="" cur_b=""
  while IFS= read -r line; do
    if [[ "$line" =~ ^##\  ]]; then
      if [[ -n "$cur_h" ]] && ! grep -qF "$cur_h" "$dst" 2>/dev/null; then
        local skip_sec=false
        [[ "$skip_frontend" == "true" && "$cur_h" == "## 7. Frontend Default Stack" ]] && skip_sec=true
        if [[ "$skip_sec" == "false" ]]; then
          printf '\n%s\n%s' "$cur_h" "$cur_b" >> "$dst"
          added=$((added + 1))
        fi
      fi
      cur_h="$line"; cur_b=""
    elif [[ -n "$cur_h" ]]; then
      cur_b+="$line"$'\n'
    fi
  done < "$src"
  if [[ -n "$cur_h" ]] && ! grep -qF "$cur_h" "$dst" 2>/dev/null; then
    local skip_sec=false
    [[ "$skip_frontend" == "true" && "$cur_h" == "## 7. Frontend Default Stack" ]] && skip_sec=true
    if [[ "$skip_sec" == "false" ]]; then
      printf '\n%s\n%s' "$cur_h" "$cur_b" >> "$dst"
      added=$((added + 1))
    fi
  fi

  if [[ $added -gt 0 ]]; then
    ok "Merged: AGENTS.md ($added new sections)"
    _ROLL_MERGE_SUMMARY+=("merged|AGENTS.md")
  else
    _ROLL_MERGE_SUMMARY+=("unchanged|AGENTS.md")
  fi
}

_merge_claude_to_project() {
  local project_dir="$1"
  local project_type
  project_type="$(scan_project_type_from_files "$project_dir")"

  local tpl_file="$ROLL_TEMPLATES/$project_type/CLAUDE.md"
  [[ -f "$tpl_file" ]] || return 0  # No template for this project type

  local claude_dir="$project_dir/.claude"
  local out_file="$claude_dir/CLAUDE.md"

  mkdir -p "$claude_dir"

  if [[ ! -f "$out_file" ]]; then
    cp "$tpl_file" "$out_file"
    ok "Created: .claude/CLAUDE.md"
    _ROLL_MERGE_SUMMARY+=("created|.claude/CLAUDE.md")
    return
  fi

  # Append any ## sections from template missing in project file
  local added=0 cur_h="" cur_b=""
  while IFS= read -r line; do
    if [[ "$line" =~ ^##\  ]]; then
      if [[ -n "$cur_h" ]] && ! grep -qF "$cur_h" "$out_file" 2>/dev/null; then
        printf '\n%s\n%s' "$cur_h" "$cur_b" >> "$out_file"
        added=$((added + 1))
      fi
      cur_h="$line"; cur_b=""
    elif [[ -n "$cur_h" ]]; then
      cur_b+="$line"$'\n'
    fi
  done < "$tpl_file"
  if [[ -n "$cur_h" ]] && ! grep -qF "$cur_h" "$out_file" 2>/dev/null; then
    printf '\n%s\n%s' "$cur_h" "$cur_b" >> "$out_file"
    added=$((added + 1))
  fi

  if [[ $added -gt 0 ]]; then
    ok "Merged: .claude/CLAUDE.md ($added new sections)"
    _ROLL_MERGE_SUMMARY+=("merged|.claude/CLAUDE.md")
  else
    _ROLL_MERGE_SUMMARY+=("unchanged|.claude/CLAUDE.md")
  fi
}

# ═══════════════════════════════════════════════════════════════════════════════
# COMMAND: init
# Initialize or re-merge a project. Always operates on the current directory.
# Fresh project: creates AGENTS.md + .roll/backlog.md + .roll/features/
# Existing AGENTS.md: re-merges global conventions (section-level, non-destructive)
# ═══════════════════════════════════════════════════════════════════════════════
cmd_init() {
  # US-ONBOARD-009: --apply consumes onboard-plan.yaml produced by $roll-onboard
  if [[ "${1:-}" == "--apply" ]]; then
    if [[ ! -d "$ROLL_TEMPLATES" ]]; then
      err "$(msg init.no_templates_found_run_roll_setup)"
      exit 1
    fi
    shift
    _init_apply "$@"
    return $?
  fi

  if [[ "${1:-}" == -* ]]; then
    err "$(msg init.unknown_flag_1)"
    exit 1
  fi

  if [[ ! -d "$ROLL_TEMPLATES" ]]; then
    err "$(msg init.no_templates_found_run_roll_setup_2)"
    exit 1
  fi

  local project_dir
  project_dir="$(pwd)"
  local has_agents=false
  _ROLL_MERGE_SUMMARY=()

  if [[ -f "$project_dir/AGENTS.md" ]]; then
    has_agents=true
  else
    # US-ONBOARD-006: legacy project detection — guide user through $roll-onboard
    # instead of blindly scaffolding files into an existing codebase.
    if _init_is_legacy_project "$project_dir"; then
      _init_legacy_onboard_guide "$project_dir"
      return 0
    fi
  fi

  # FIX-073: Suppress per-step echoes — outcomes are captured into
  # _ROLL_MERGE_SUMMARY and rendered through the v2 UI below.
  {
    _merge_global_to_project "$project_dir"
    _merge_claude_to_project "$project_dir"
    _write_backlog "$project_dir/.roll/backlog.md"
    _ensure_features_dir "$project_dir/.roll/features"
    _write_features_md "$project_dir/.roll/features.md"
    # US-AGENT-003: seed agent-routes.yaml from template. Env override:
    #   ROLL_AGENT_ROUTES_TEMPLATE=minimal roll init
    # Onboard plan can also set agent_routes_template; _init_apply reads it.
    _init_seed_agent_routes "${ROLL_AGENT_ROUTES_TEMPLATE:-default}" "$project_dir" || true
    # US-ONBOARD-019: stamp the project so legacy detection can recognise it
    # as Roll-onboarded without depending on directory-name heuristics.
    _write_version_stamp "$project_dir"
  } >/dev/null

  local sync_status="ok"
  if ! _sync_conventions >/dev/null 2>&1; then
    sync_status="fail"
  fi

  _install_launchd_plists "$project_dir" >/dev/null 2>&1 || true

  _emit_init_v2_ui "$project_dir" "$has_agents" "$sync_status"
}

# FIX-073: Build a real-data JSON payload from _ROLL_MERGE_SUMMARY and pipe it
# to lib/roll-init.py for v2 UI rendering. Replaces the previous demo-only
# render path.
_emit_init_v2_ui() {
  local project_dir="$1"
  local has_agents="$2"
  local sync_status="${3:-ok}"

  local header_label="INIT" subtitle="项目初始化" footer_label="Initialized"
  if [[ "$has_agents" == "true" ]]; then
    header_label="REINIT"
    subtitle="重新合并约定"
    footer_label="Re-merged"
  fi

  local color_flag=""
  if [[ -n "${NO_COLOR:-}" ]] || ! [ -t 1 ]; then
    color_flag="--no-color"
  fi

  python3 - "$project_dir" "$header_label" "$subtitle" "$footer_label" "$sync_status" "${_ROLL_MERGE_SUMMARY[@]}" <<'PY' \
    | python3 "${ROLL_PKG_DIR}/lib/roll-init.py" $color_flag
import json, sys
project_dir, header_label, subtitle, footer_label, sync_status, *summary = sys.argv[1:]
STATUS = {"created":"ok","merged":"ok","unchanged":"skip","overwritten":"ok","kept":"skip"}
OP = {"created":"+","merged":"~","unchanged":"·","overwritten":"~","kept":"·"}
by_file = {}
for entry in summary:
    action, _sep, fname = entry.partition("|")
    if fname:
        by_file[fname] = action

def step(num, label, fname):
    act = by_file.get(fname)
    if not act:
        return {"num": num, "label": label, "status": "skip",
                "note": "not modified"}
    return {"num": num, "label": label, "status": STATUS.get(act, "ok"),
            "files": [[OP.get(act, "·"), fname]]}

steps = [
    {"num": 1, "label": "Detect project type", "status": "ok"},
    step(2, "Create AGENTS.md",           "AGENTS.md"),
    step(3, "Create .roll/backlog.md",    ".roll/backlog.md"),
    step(4, "Create .roll/features/",     ".roll/features/"),
    step(5, "Merge existing CLAUDE.md",   ".claude/CLAUDE.md"),
    {"num": 6, "label": "Link skills to AI clients", "status": sync_status},
]
footer_status = "fail" if any(s["status"] == "fail" for s in steps) else "ok"
payload = {
    "header_label": header_label,
    "subtitle":     subtitle,
    "project_path": project_dir,
    "steps":        steps,
    "footer":       {"status": footer_status,
                     "label": footer_label if footer_status == "ok" else "Init incomplete"},
    "next": [
        ["Edit .roll/backlog.md",   "open the backlog and add your first US"],
        ["Run roll loop now",       "execute one cycle manually to test the flow"],
        ["Enable loop scheduling",  "roll loop on  — let it run hourly"],
    ],
}
print(json.dumps(payload))
PY
}

# US-ONBOARD-006: Legacy detection
# A project is "Legacy" if it has substantive code but no AGENTS.md to anchor
# Roll conventions. US-ONBOARD-012 widened the recogniser to cover non-canonical
# layouts (WeChat mini-program, Python flat, Terraform, etc.) — any of:
#   1. Classic layout: src/app/lib/pkg/cmd contains ≥10 non-empty files.
#   2. A manifest of any common ecosystem at the project root.
#   3. Git history exists (at least one commit on HEAD).
# Either signal alone is enough; the AGENTS.md check happens earlier.
_init_is_legacy_project() {
  local project_dir="$1"

  # Signal 1 — classic source layout
  local dir count
  for dir in src app lib pkg cmd; do
    if [[ -d "$project_dir/$dir" ]]; then
      count=$(find "$project_dir/$dir" -type f -not -empty 2>/dev/null | wc -l | tr -d ' ')
      if [[ "$count" -ge 10 ]]; then
        return 0
      fi
    fi
  done

  # Signal 2 — manifest file at project root
  local manifest
  for manifest in \
      package.json pyproject.toml requirements.txt setup.py setup.cfg Pipfile \
      go.mod Cargo.toml Gemfile pom.xml build.gradle build.gradle.kts \
      Makefile Dockerfile docker-compose.yml docker-compose.yaml \
      app.json project.config.json \
      mix.exs composer.json deno.json deno.jsonc; do
    [[ -f "$project_dir/$manifest" ]] && return 0
  done
  # Terraform: any *.tf at the root
  if compgen -G "$project_dir/*.tf" >/dev/null 2>&1; then
    return 0
  fi

  # Signal 3 — git history exists
  if [[ -d "$project_dir/.git" ]] || [[ -f "$project_dir/.git" ]]; then
    if ( cd "$project_dir" && git rev-parse --verify HEAD >/dev/null 2>&1 ); then
      return 0
    fi
  fi

  return 1
}

# US-ONBOARD-006: Agent discovery + token consumption notice + onboard guidance.
# US-ONBOARD-018: also auto-launches the chosen agent in interactive mode with
# the $roll-onboard skill content pre-loaded as the initial prompt, then chains
# into `roll init --apply` when the conversation ends successfully.
_init_legacy_onboard_guide() {
  local project_dir="$1"
  local count_summary
  count_summary=$(_init_legacy_file_summary "$project_dir")

  info "$(msg init.detected_legacy_project ${count_summary})"
  echo ""

  # Discover installed agents (writes to globals: _ONBOARD_INSTALLED, _ONBOARD_MISSING).
  _onboard_discover_agents

  echo "$(msg init.onboarding)"
  echo "  Onboarding requires an AI agent to read your code. Detected:"
  echo ""
  local n
  if [[ ${#_ONBOARD_INSTALLED[@]} -gt 0 ]]; then
    for n in "${_ONBOARD_INSTALLED[@]}"; do
      printf "    %b✓%b %s   (installed)\n" "${GREEN}" "${NC}" "$n"
    done
  fi
  if [[ ${#_ONBOARD_MISSING[@]} -gt 0 ]]; then
    for n in "${_ONBOARD_MISSING[@]}"; do
      printf "    %b✗%b %s   (not found)\n" "${RED}" "${NC}" "$n"
    done
  fi

  if [[ ${#_ONBOARD_INSTALLED[@]} -eq 0 ]]; then
    echo ""
    err "No AI agent detected. Install one (e.g., 'claude', 'codex', 'kimi') and try again."
    err "$(msg init.no_ai_agent_detected_install_one)"
    return 1
  fi

  echo ""
  echo "$(msg init.the_process_will_use_your_agent)"
  echo "  Onboarding uses your agent to call models — tokens are billed to your account."
  echo ""
  echo "$(msg init.code_and_conversations_stay_in_your)"
  echo "  Your code and conversation stay in your agent — Roll never uploads anything."
  echo ""

  # US-ONBOARD-018: select an agent. Single installed → auto-pick. Multiple
  # installed → ask the user (or honour ROLL_ONBOARD_AGENT for non-interactive
  # callers and tests).
  local chosen
  chosen=$(_onboard_select_agent "${_ONBOARD_INSTALLED[@]}") || return 1
  [[ -n "$chosen" ]] || return 1

  echo ""
  info "$(msg init.launching ${chosen})"
  echo "  Conversation ends with /exit (or Ctrl-C). On exit Roll will run apply for you."
  echo "$(msg init.use_exit_to_end_or_ctrl)"
  echo ""

  # US-ONBOARD-018: actually run the agent with the onboard prompt pre-loaded.
  _run_onboard_agent "$chosen" "$project_dir"
}

# US-ONBOARD-018: discover installed AI agents from ROLL_CONFIG.
# Populates global arrays _ONBOARD_INSTALLED and _ONBOARD_MISSING.
# Extracted from _init_legacy_onboard_guide so it can be unit-tested.
_onboard_discover_agents() {
  _ONBOARD_INSTALLED=()
  _ONBOARD_MISSING=()
  local _key _value _name _dir
  while IFS=: read -r _key _value; do
    [[ "$_key" =~ ^ai_ ]] || continue
    _name="${_key#ai_}"
    # ai_kimi_code → kimi (avoid listing the same agent twice).
    [[ "$_name" == "kimi_code" ]] && _name="kimi"
    _dir="${_value%%|*}"
    _dir="${_dir# }"
    _dir="${_dir/#\~/$HOME}"
    # FIX-128: route via _agent_installed_by_name so "installed" means the
    # CLI is actually on PATH for known agents, not just the config dir
    # that Roll's own convention sync would have created.
    if _agent_installed_by_name "$_name" "$_dir"; then
      # Dedupe — kimi may appear under both ai_kimi and ai_kimi_code.
      # `${arr[@]+...}` keeps `set -u` happy when the array is still empty.
      local _already=0 _existing
      for _existing in ${_ONBOARD_INSTALLED[@]+"${_ONBOARD_INSTALLED[@]}"}; do
        if [[ "$_existing" == "$_name" ]]; then _already=1; break; fi
      done
      if [[ $_already -eq 0 ]]; then _ONBOARD_INSTALLED+=("$_name"); fi
    else
      local _already=0 _existing
      for _existing in ${_ONBOARD_MISSING[@]+"${_ONBOARD_MISSING[@]}"}; do
        if [[ "$_existing" == "$_name" ]]; then _already=1; break; fi
      done
      if [[ $_already -eq 0 ]]; then _ONBOARD_MISSING+=("$_name"); fi
    fi
  done < "$ROLL_CONFIG"
  return 0
}

# US-ONBOARD-018: pick an agent for the onboard flow.
# - $ROLL_ONBOARD_AGENT (env) wins if set and present in the candidate list.
# - Single candidate → auto-pick (printed to stdout).
# - Multiple candidates → prompt user for a number. Stdin EOF / invalid input → return 1.
# Echoes the chosen agent name on stdout, nothing else.
_onboard_select_agent() {
  local -a candidates=("$@")
  [[ ${#candidates[@]} -gt 0 ]] || return 1

  # Explicit override (tests, CI, dotfile aliases).
  if [[ -n "${ROLL_ONBOARD_AGENT:-}" ]]; then
    local c
    for c in "${candidates[@]}"; do
      if [[ "$c" == "$ROLL_ONBOARD_AGENT" ]]; then
        printf '%s\n' "$c"
        return 0
      fi
    done
    err "ROLL_ONBOARD_AGENT='${ROLL_ONBOARD_AGENT}' is not in installed agents." >&2
    return 1
  fi

  if [[ ${#candidates[@]} -eq 1 ]]; then
    printf '%s\n' "${candidates[0]}"
    return 0
  fi

  # Multi-agent: prompt the user. To stderr so stdout stays clean for the caller.
  {
    echo "$(msg init.pick_an_agent)"
    local i=1
    for c in "${candidates[@]}"; do
      printf "    %d) %s\n" "$i" "$c"
      i=$((i + 1))
    done
    printf "  Enter number [1-%d]: " "${#candidates[@]}"
  } >&2

  local choice
  if ! IFS= read -r choice; then
    err "$(msg init.no_input_received_aborting_onboard)" >&2
    return 1
  fi
  if ! [[ "$choice" =~ ^[0-9]+$ ]] || (( choice < 1 || choice > ${#candidates[@]} )); then
    err "$(msg init.invalid_choice ${choice})" >&2
    return 1
  fi
  printf '%s\n' "${candidates[$((choice - 1))]}"
}

# US-ONBOARD-018: compose the initial prompt for the onboard agent —
# the $roll-onboard skill body (frontmatter stripped) plus a kickoff line.
# Returns 1 if the skill file is missing.
_onboard_initial_prompt() {
  local skill_file="${ROLL_PKG_DIR}/skills/roll-onboard/SKILL.md"
  [[ -f "$skill_file" ]] || {
    err "Skill file missing: ${skill_file}" >&2
    return 1
  }
  # Lead line orients the agent before the skill body. Keep it stable; tests
  # match on this exact prefix.
  printf '%s\n\n' "Run the \$roll-onboard skill below for this project. Follow it end-to-end and write .roll/onboard-plan.yaml when done."
  _skill_content "$skill_file"
}

# US-ONBOARD-018: print retry / switch-agent guidance after a failed onboard run.
# Extracted so we can unit-test the wording without spawning subprocesses.
# $1 = chosen agent name, $2 = exit code from the agent.
_onboard_failure_hint() {
  local agent="$1" code="$2"
  echo "" >&2
  if [[ "$code" == "130" ]]; then
    err "$(msg init.onboard_cancelled)" >&2
  else
    err "$(msg init.onboard_agent_exited "$agent" "$code")" >&2
  fi
  echo "" >&2
  echo "  $(msg init.onboard_next_step)" >&2
  echo "    - $(msg init.onboard_retry)" >&2
  echo "    - $(msg init.onboard_retry_en)" >&2
  echo "    - $(msg init.onboard_switch)" >&2
  echo "    - $(msg init.onboard_switch_en)" >&2
  echo "" >&2
}

# US-ONBOARD-018: launch the chosen agent in interactive mode with the onboard
# prompt pre-loaded, wait for it to exit, then branch:
#   exit 0  + .roll/onboard-plan.yaml present → chain into roll init --apply
#   exit 0  + plan missing                    → tell user to re-run if they want
#   exit !0 (incl. 130 from SIGINT)           → print retry / switch-agent hint
# $1 = chosen agent name, $2 = project dir
_run_onboard_agent() {
  local agent="$1" project_dir="$2"
  local prompt
  prompt=$(_onboard_initial_prompt) || return 1
  _agent_argv "$agent" interactive "$prompt" || {
    err "$(msg init.agent_has_no_interactive_mode_wired ${agent})" >&2
    return 1
  }

  # Run attached to the user's tty so the agent's REPL gets stdin/stdout/stderr.
  # `set -e` is active script-wide; suppress with `|| rc=$?` so the failure
  # branch (SIGINT 130, agent error) can be handled instead of aborting init.
  local rc=0
  "${_AGENT_ARGV[@]}" || rc=$?

  if [[ "$rc" -ne 0 ]]; then
    _onboard_failure_hint "$agent" "$rc"
    return "$rc"
  fi

  if [[ ! -f "${project_dir}/.roll/onboard-plan.yaml" ]]; then
    echo "" >&2
    err "Agent exited cleanly but did not write .roll/onboard-plan.yaml." >&2
    err "$(msg init.agent)" >&2
    echo "  Re-run \`roll init\` once you've completed the conversation." >&2
    echo "$(msg init.en_roll_init)" >&2
    return 1
  fi

  # Plan present → chain into apply automatically.
  echo "" >&2
  info "$(msg init.plan_written_running_apply)"
  ( cd "$project_dir" && _init_apply )
}

# Helper: human-readable summary of why this is detected as legacy.
_init_legacy_file_summary() {
  local project_dir="$1"
  local dir count parts=()
  for dir in src app lib pkg cmd; do
    if [[ -d "$project_dir/$dir" ]]; then
      count=$(find "$project_dir/$dir" -type f -not -empty 2>/dev/null | wc -l | tr -d ' ')
      if [[ "$count" -ge 10 ]]; then
        parts+=("${count} files in ${dir}/")
      fi
    fi
  done
  # US-ONBOARD-012: surface non-canonical signals in the summary too.
  local manifest
  for manifest in \
      package.json pyproject.toml requirements.txt setup.py setup.cfg Pipfile \
      go.mod Cargo.toml Gemfile pom.xml build.gradle build.gradle.kts \
      Makefile Dockerfile docker-compose.yml docker-compose.yaml \
      app.json project.config.json \
      mix.exs composer.json deno.json deno.jsonc; do
    if [[ -f "$project_dir/$manifest" ]]; then
      parts+=("manifest: $manifest")
      break
    fi
  done
  if compgen -G "$project_dir/*.tf" >/dev/null 2>&1; then
    parts+=("Terraform .tf files")
  fi
  if [[ ${#parts[@]} -eq 0 ]] \
      && { [[ -d "$project_dir/.git" ]] || [[ -f "$project_dir/.git" ]]; } \
      && ( cd "$project_dir" && git rev-parse --verify HEAD >/dev/null 2>&1 ); then
    parts+=("git history present")
  fi
  echo "no AGENTS.md, ${parts[*]}"
}

# US-ONBOARD-013: changeset recording — onboard writes a manifest of every
# side effect (files created, .gitignore entries, scope) into
# .roll/onboard-changeset.yaml so `roll offboard` has a rollback record.
# Without this, a user who wants to retire Roll from a project has to guess
# which files came from onboard vs their own work.
_onboard_changeset_path() {
  echo "$1/.roll/onboard-changeset.yaml"
}

# Begin a fresh changeset record. Overwrites any prior file — every apply
# starts from a clean slate; offboard reads the latest record.
_onboard_changeset_begin() {
  local project_dir="$1"
  local path; path=$(_onboard_changeset_path "$project_dir")
  mkdir -p "$(dirname "$path")"
  cat > "$path" <<EOF
# Generated by \`roll init --apply\`. Used by \`roll offboard\` to reverse
# the changes onboard made. Do not edit by hand.
onboarded_at: "$(date -u +%FT%TZ)"
roll_version: "$(_pkg_version 2>/dev/null || echo unknown)"
scope_approved: []
files_created: []
dirs_created: []
gitignore_entries_added: []
launchd_plists_installed: []
EOF
}

# Append a YAML list entry to a section in the changeset.
_onboard_changeset_record() {
  local project_dir="$1" section="$2" value="$3"
  local path; path=$(_onboard_changeset_path "$project_dir")
  [ -f "$path" ] || return 0
  # Each section line ends with `: []` — replace with the new value on first
  # entry, otherwise append a `- <value>` line under the section.
  if grep -qE "^${section}: \[\]$" "$path"; then
    local tmp; tmp=$(mktemp)
    awk -v sec="$section" -v val="$value" '
      $0 ~ "^" sec ": \\[\\]$" {
        print sec ":"
        print "  - \"" val "\""
        next
      }
      { print }
    ' "$path" > "$tmp" && mv "$tmp" "$path"
  else
    # Find the section header and insert under it (after the last entry).
    local tmp; tmp=$(mktemp)
    awk -v sec="$section" -v val="$value" '
      $0 ~ "^" sec ":$" {
        print
        in_sec=1; next
      }
      in_sec && /^[a-z_]+:/ {
        print "  - \"" val "\""
        in_sec=0
        print; next
      }
      { print }
      END {
        if (in_sec) print "  - \"" val "\""
      }
    ' "$path" > "$tmp" && mv "$tmp" "$path"
  fi
}

# US-ONBOARD-017: insert a candidate story row into .roll/backlog.md under the
# "Epic: Initial Setup" table, idempotently. Returns 0 if a row was written, 1
# if it already existed (so the caller can keep an accurate seeded count).
# The id is a deterministic placeholder (US-SEED-NNN); the human renumbers it
# into their real scheme later.
_onboard_seed_backlog_story() {
  local backlog="$1" id="$2" title="$3"
  [ -f "$backlog" ] || return 1
  # Idempotency: never append a row whose id already appears in the file.
  if grep -qF "| ${id} |" "$backlog"; then
    return 1
  fi
  local row="| ${id} | ${title} | 📋 Todo |"
  local tmp; tmp=$(mktemp)
  # Insert directly after the Epic table header separator line. If that anchor
  # is absent (custom backlog), append the row at end of file as a fallback.
  awk -v row="$row" '
    !done && $0 ~ /^\|-+\|-+\|-+\|$/ && seen_epic {
      print
      print row
      done=1
      next
    }
    /^## Epic: Initial Setup$/ { seen_epic=1 }
    { print }
    END { if (!done) print row }
  ' "$backlog" > "$tmp" && mv "$tmp" "$backlog"
  return 0
}

# US-ONBOARD-017: insert a candidate FIX row into the "Bug Fixes" table,
# idempotently. Same contract as _onboard_seed_backlog_story.
_onboard_seed_backlog_fix() {
  local backlog="$1" id="$2" problem="$3"
  [ -f "$backlog" ] || return 1
  if grep -qF "| ${id} |" "$backlog"; then
    return 1
  fi
  local row="| ${id} | ${problem} | 📋 Todo |"
  local tmp; tmp=$(mktemp)
  awk -v row="$row" '
    !done && $0 ~ /^\|-+\|-+\|-+\|$/ && seen_fix {
      print
      print row
      done=1
      next
    }
    /^## Bug Fixes$/ { seen_fix=1 }
    { print }
    END { if (!done) print row }
  ' "$backlog" > "$tmp" && mv "$tmp" "$backlog"
  return 0
}

# US-ONBOARD-017: render the three analysis sections to markdown and seed
# BACKLOG/FIX candidates behind a HARD [Y/n] gate.
#
# Rendering (deterministic; delegated to lib/roll-onboard-render.py) ALWAYS
# happens when the plan carries any Phase 2 section — the three md files are
# generated even if the user declines seeding. Each rendered file is registered
# in the offboard changeset's files_created so `roll offboard` removes it.
#
# Seeding is gated: the user sees a preview ("about to seed N stories") and must
# type an explicit y/Y. n, a bare Enter, EOF, and a non-interactive stdin all
# CANCEL (the safe default — peer review confirmed cancel-with-note for non-tty;
# under `set -e` an unguarded read on EOF could abort, so we trap INT locally
# and check the read exit code). Cancelling skips ONLY seeding; the md files
# stay. HIGH-severity risks are offered as FIX-SEED-NNN under a second, separate
# confirm.
_init_render_and_seed() {
  local project_dir="$1" plan="$2"
  local renderer="${ROLL_PKG_DIR}/lib/roll-onboard-render.py"
  [ -f "$renderer" ] || return 0   # renderer absent → nothing to do (no-op)

  # Run the renderer; capture its manifest. Exit 2 = no Phase 2 sections
  # (clean no-op); exit 1 = real failure (warn but don't abort the whole apply).
  local manifest rc
  manifest=$(python3 "$renderer" "$plan" "$project_dir")
  rc=$?
  if [ "$rc" -eq 2 ]; then
    return 0
  fi
  if [ "$rc" -ne 0 ]; then
    warn "$(msg init.onboard_render_failed)"
    return 0
  fi

  # Parse the pipe-delimited manifest. FILE rows are registered immediately
  # (the files already exist on disk — renderer is atomic). SEED/FIX rows are
  # buffered for the gated preview.
  local -a seed_ids=() seed_titles=() fix_ids=() fix_titles=()
  local kind a b
  while IFS='|' read -r kind a b; do
    [ -z "$kind" ] && continue
    case "$kind" in
      FILE)
        _onboard_changeset_record "$project_dir" "files_created" "$a"
        ok "$(msg init.onboard_rendered "$a")"
        ;;
      SEED) seed_ids+=("$a"); seed_titles+=("$b") ;;
      FIX)  fix_ids+=("$a");  fix_titles+=("$b") ;;
    esac
  done <<< "$manifest"

  local backlog="$project_dir/.roll/backlog.md"

  # ── Story seeding gate ──────────────────────────────────────────────────
  if [ "${#seed_ids[@]}" -gt 0 ]; then
    if [ ! -f "$backlog" ]; then
      info "$(msg init.onboard_seed_no_backlog)"
    elif _onboard_confirm_seed "${#seed_ids[@]}" "story" seed_ids seed_titles; then
      local i seeded=0
      for i in "${!seed_ids[@]}"; do
        if _onboard_seed_backlog_story "$backlog" "${seed_ids[$i]}" "${seed_titles[$i]}"; then
          seeded=$((seeded + 1))
        fi
      done
      ok "$(msg init.onboard_seeded_stories "$seeded")"
    else
      info "$(msg init.onboard_seed_cancelled)"
    fi
  fi

  # ── HIGH-risk FIX seeding gate (separate confirm) ───────────────────────
  if [ "${#fix_ids[@]}" -gt 0 ]; then
    if [ ! -f "$backlog" ]; then
      info "$(msg init.onboard_seed_no_backlog)"
    elif _onboard_confirm_seed "${#fix_ids[@]}" "fix" fix_ids fix_titles; then
      local j seeded_fix=0
      for j in "${!fix_ids[@]}"; do
        if _onboard_seed_backlog_fix "$backlog" "${fix_ids[$j]}" "${fix_titles[$j]}"; then
          seeded_fix=$((seeded_fix + 1))
        fi
      done
      ok "$(msg init.onboard_seeded_fixes "$seeded_fix")"
    else
      info "$(msg init.onboard_seed_cancelled)"
    fi
  fi
}

# US-ONBOARD-017: the HARD [Y/n] gate. Prints a preview list then reads one
# line. Returns 0 ONLY on an explicit y/Y. Everything else — n, bare Enter,
# EOF, non-interactive stdin — returns 1 (cancel). Default is NO.
#   $1 = count, $2 = noun ("story"/"fix"), $3/$4 = nameref arrays of ids/titles
_onboard_confirm_seed() {
  local count="$1" noun="$2"
  local -n _ids="$3"
  local -n _titles="$4"
  echo "" >&2
  if [ "$noun" = "fix" ]; then
    warn "$(msg init.onboard_seed_preview_fix "$count")" >&2
  else
    warn "$(msg init.onboard_seed_preview_story "$count")" >&2
  fi
  local i
  for i in "${!_ids[@]}"; do
    echo "    ${_ids[$i]}  ${_titles[$i]}" >&2
  done
  echo "" >&2

  # Non-interactive stdin (CI / piped): never block, never seed. Cancel + note.
  # ROLL_ASSUME_TTY=1 forces the interactive read path so tests can pipe the
  # answer in (mirrors the ROLL_SPIN_FORCE_TTY seam used elsewhere in bin/roll).
  if [ "${ROLL_ASSUME_TTY:-}" != "1" ] && [ ! -t 0 ]; then
    info "$(msg init.onboard_seed_noninteractive)" >&2
    return 1
  fi

  echo -e "  ${BOLD}$(msg init.onboard_seed_prompt)${NC} [y/N] " >&2
  # Trap INT locally so Ctrl-C cancels just this gate (returns 1) rather than
  # aborting all of roll init under `set -e`. Restore the prior handler after.
  local _saved_int reply
  _saved_int="$(trap -p INT 2>/dev/null || true)"
  trap 'return 1' INT
  if ! read -r reply; then
    # EOF (e.g. </dev/null) → cancel.
    reply=""
  fi
  if [ -n "$_saved_int" ]; then eval "$_saved_int"; else trap - INT; fi

  case "$reply" in
    y|Y|yes|YES) return 0 ;;
    *) return 1 ;;   # n / empty / anything else → cancel
  esac
}

# US-ONBOARD-009: roll init --apply
# Consume .roll/onboard-plan.yaml (produced by $roll-onboard skill) and execute
# all side effects: create .roll/ structure per scope, sync AI tools, write
# .gitignore based on user's Q7 choice.
#
# Plan validation is delegated to lib/roll-plan-validate.py to avoid bash YAML
# parsing fragility.
_init_apply() {
  local project_dir; project_dir="$(pwd)"
  local plan="${project_dir}/.roll/onboard-plan.yaml"
  local validator="${ROLL_PKG_DIR}/lib/roll-plan-validate.py"

  if [[ ! -f "$plan" ]]; then
    err "$(msg init.no_onboard_plan_found_at_roll)"
    echo "" >&2
    echo "  Run \$roll-onboard in your AI agent first to generate the plan." >&2
    echo "$(msg init.en_ai_agent_onboard_plan_ap $roll)" >&2
    return 1
  fi

  if [[ ! -f "$validator" ]]; then
    err "$(msg init.plan_validator_missing $validator)"
    return 1
  fi

  # Validate plan (schema + generated_at freshness + version)
  if ! python3 "$validator" "$plan"; then
    err "$(msg init.plan_validation_failed_see_errors_above)"
    echo "" >&2
    echo "  If the plan is stale (>24h), regenerate by running \$roll-onboard again." >&2
    return 1
  fi

  info "$(msg init.applying_onboard_plan)"
  _ROLL_MERGE_SUMMARY=()

  # US-ONBOARD-013: start a fresh changeset record so offboard can reverse.
  _onboard_changeset_begin "$project_dir"

  # Read scope from plan (simple grep — validator confirmed structure)
  local approved
  approved=$(python3 -c "
import yaml, sys
p = yaml.safe_load(open('$plan'))
print(' '.join(p.get('scope', {}).get('approved', [])))
" 2>/dev/null || echo "")

  # Record each approved scope entry for offboard's selective rollback.
  local item
  for item in $approved; do
    _onboard_changeset_record "$project_dir" "scope_approved" "$item"
  done

  _merge_global_to_project "$project_dir"
  _merge_claude_to_project "$project_dir"

  # US-ONBOARD-019: stamp the project at onboard-apply time so subsequent
  # invocations recognise it as Roll-onboarded (and offboard can sweep it).
  local _stamp_existed=true
  [[ -f "$project_dir/.roll/.version" ]] || _stamp_existed=false
  _write_version_stamp "$project_dir"
  if [[ "$_stamp_existed" == "false" ]] && [[ -f "$project_dir/.roll/.version" ]]; then
    _onboard_changeset_record "$project_dir" "files_created" ".roll/.version"
  fi

  # Create .roll/ artifacts based on scope.approved
  if [[ " $approved " == *" backlog "* ]]; then
    _write_backlog "$project_dir/.roll/backlog.md"
    _onboard_changeset_record "$project_dir" "files_created" ".roll/backlog.md"
  fi

  # US-AGENT-003: seed agent-routes.yaml. Template precedence:
  #   1. plan.agent_routes_template (set by $roll-onboard interactive flow)
  #   2. ROLL_AGENT_ROUTES_TEMPLATE env var
  #   3. "default"
  # Set to "skip" to omit seeding entirely.
  local _routes_template
  _routes_template=$(python3 -c "
import yaml
p = yaml.safe_load(open('$plan')) or {}
print(p.get('agent_routes_template', '') or '')
" 2>/dev/null || echo "")
  if [[ -z "$_routes_template" ]]; then
    _routes_template="${ROLL_AGENT_ROUTES_TEMPLATE:-default}"
  fi
  if [[ "$_routes_template" != "skip" ]]; then
    if _init_seed_agent_routes "$_routes_template" "$project_dir"; then
      _onboard_changeset_record "$project_dir" "files_created" ".roll/agent-routes.yaml"
    fi
  fi
  if [[ " $approved " == *" features "* ]]; then
    _ensure_features_dir "$project_dir/.roll/features"
    _write_features_md "$project_dir/.roll/features.md"
    _onboard_changeset_record "$project_dir" "dirs_created" ".roll/features"
    _onboard_changeset_record "$project_dir" "files_created" ".roll/features.md"
  fi
  if [[ " $approved " == *" domain "* ]]; then
    mkdir -p "$project_dir/.roll/domain"
    _onboard_changeset_record "$project_dir" "dirs_created" ".roll/domain"
  fi
  if [[ " $approved " == *" briefs "* ]]; then
    mkdir -p "$project_dir/.roll/briefs"
    _onboard_changeset_record "$project_dir" "dirs_created" ".roll/briefs"
  fi

  # US-ONBOARD-017: render the Phase 2 analysis sections to markdown (always,
  # when present) and offer BACKLOG/FIX seeding behind a HARD [Y/n] gate. The
  # rendered files are registered in the changeset for offboard. Runs after the
  # scope artifacts so .roll/backlog.md and .roll/domain/ already exist.
  _init_render_and_seed "$project_dir" "$plan"

  print_merge_summary

  # Q7: .gitignore preference
  local gitignore_roll
  gitignore_roll=$(python3 -c "
import yaml
p = yaml.safe_load(open('$plan'))
print('true' if p.get('privacy', {}).get('gitignore_dot_roll', False) else 'false')
" 2>/dev/null || echo "false")

  if [[ "$gitignore_roll" == "true" ]]; then
    local gi="$project_dir/.gitignore"
    if ! grep -qFx ".roll/" "$gi" 2>/dev/null; then
      echo ".roll/" >> "$gi"
      _onboard_changeset_record "$project_dir" "gitignore_entries_added" ".roll/"
      ok "$(msg init.added_roll_to_gitignore)"
    fi
  fi

  echo ""
  info "$(msg init.syncing_conventions_to_ai_tools)"
  _sync_conventions
  echo ""

  ok "$(msg init.onboard_apply_complete_onboard)"
}

# US-ONBOARD-014: roll offboard
# Reverse what `roll init --apply` (US-ONBOARD-009/013) did, using the
# changeset manifest at .roll/onboard-changeset.yaml as the rollback record.
# Safety contract:
#   1. Refuse when no changeset exists — print manual instructions instead.
#   2. Default to dry-run; only `--confirm` (or `-y`) actually deletes.
#   3. Only touch entries that are in the manifest. Anything else stays put.
#   4. Refuse to delete a file/dir whose path does not resolve under the
#      current project root, even if the changeset says so (cross-project
#      guard). Print the safe manual command instead.
cmd_offboard() {
  local confirm=0
  local arg
  for arg in "$@"; do
    case "$arg" in
      --confirm|-y) confirm=1 ;;
      --help|-h)
        echo "Usage: roll offboard [--confirm]"
        echo "  Preview (default) or apply (--confirm) the removal of every"
        echo "  artefact recorded in .roll/onboard-changeset.yaml."
        return 0
        ;;
      *)
        err "$(msg offboard.unknown_flag $arg)"
        return 1
        ;;
    esac
  done

  local project_dir; project_dir="$(pwd -P)"
  local changeset; changeset=$(_onboard_changeset_path "$project_dir")

  if [[ ! -f "$changeset" ]]; then
    err "$(msg offboard.no_changeset_en)"
    err "$(msg offboard.no_changeset_zh)"
    echo "" >&2
    echo "  $(msg offboard.manual_offboard)" >&2
    echo "    $(msg offboard.manual_rm_roll)" >&2
    echo "    $(msg offboard.manual_rm_agents)" >&2
    echo "    $(msg offboard.manual_edit_gitignore)" >&2
    return 1
  fi

  # Parse changeset (Python keeps YAML semantics consistent with apply).
  local parser
  parser=$(python3 - "$changeset" <<'PY'
import sys, yaml
try:
    data = yaml.safe_load(open(sys.argv[1])) or {}
except Exception as e:
    print(f"PARSE_ERROR:{e}", file=sys.stderr)
    sys.exit(2)
def pr(section):
    for v in (data.get(section) or []):
        print(f"{section}\t{v}")
pr("files_created")
pr("dirs_created")
pr("gitignore_entries_added")
pr("launchd_plists_installed")
PY
  )
  if [[ $? -ne 0 ]]; then
    err "$(msg offboard.failed_to_parse_changeset)"
    return 1
  fi

  local files=() dirs=() gi_entries=() plists=()
  while IFS=$'\t' read -r section value; do
    [[ -z "$section" ]] && continue
    case "$section" in
      files_created)           files+=("$value") ;;
      dirs_created)            dirs+=("$value") ;;
      gitignore_entries_added) gi_entries+=("$value") ;;
      launchd_plists_installed) plists+=("$value") ;;
    esac
  done <<< "$parser"

  # Cross-project guard — verify every recorded path resolves under
  # project_dir. Catches the case where a user accidentally points roll
  # offboard at a directory whose changeset names paths from elsewhere.
  local item resolved
  local _all=()
  [ "${#files[@]}" -gt 0 ] && _all+=("${files[@]}")
  [ "${#dirs[@]}" -gt 0 ]  && _all+=("${dirs[@]}")
  for item in "${_all[@]:+${_all[@]}}"; do
    case "$item" in
      /*) resolved="$item" ;;        # absolute — must already start with project_dir
      *)  resolved="$project_dir/$item" ;;
    esac
    case "$resolved" in
      "$project_dir"|"$project_dir"/*) ;;
      *)
        err "Refusing to act on '$item' — it does not resolve under $project_dir"
        err "$(msg offboard.en $item)"
        echo "  This usually means the changeset was copied from another project." >&2
        echo "  Remove .roll/onboard-changeset.yaml manually, or rerun in the right dir." >&2
        return 1
        ;;
    esac
  done

  # Print the plan.
  echo ""
  echo -e "  ${BOLD}Offboard plan for ${project_dir}${NC}"
  echo ""
  if [[ ${#files[@]} -gt 0 ]]; then
    echo -e "  ${RED}Files to remove:${NC}"
    for item in "${files[@]}"; do echo "    rm   $item"; done
    echo ""
  fi
  if [[ ${#dirs[@]} -gt 0 ]]; then
    echo -e "  ${RED}Directories to remove:${NC}"
    for item in "${dirs[@]}"; do echo "    rmdir/r $item"; done
    echo ""
  fi
  if [[ ${#gi_entries[@]} -gt 0 ]]; then
    echo -e "  ${YELLOW}.gitignore entries to remove:${NC}"
    for item in "${gi_entries[@]}"; do echo "    -    $item"; done
    echo ""
  fi
  if [[ ${#plists[@]} -gt 0 ]]; then
    echo -e "  ${YELLOW}launchd plists to unload:${NC}"
    for item in "${plists[@]}"; do echo "    unload $item"; done
    echo ""
  fi
  if [[ ${#files[@]} -eq 0 && ${#dirs[@]} -eq 0 && ${#gi_entries[@]} -eq 0 && ${#plists[@]} -eq 0 ]]; then
    info "Changeset is empty — nothing to offboard."
    info "$(msg offboard.change_list_is_empty_nothing_to)"
    return 0
  fi

  if [[ "$confirm" -ne 1 ]]; then
    echo "  This is a dry-run. Re-run with --confirm to apply."
    echo "$(msg offboard.above_is_a_dry_run_preview)"
    return 0
  fi

  # FIX-125: cycle-context tripwire. Apply phase below runs launchctl unload
  # and rm against ${HOME}/Library/LaunchAgents/<plist> (bin/roll:1957-1958).
  # From inside a loop cycle this would mutate the host's launchd domain
  # using another project's identity. Doc-only offboards (no plists) stay
  # allowed so cycles can still call offboard for non-launchd cleanup.
  if [ "${#plists[@]}" -gt 0 ] && _loop_in_cycle; then
    err "Refusing to unload launchd plists from inside a loop cycle (FIX-125)."
    echo "  Run 'roll offboard --confirm' from a terminal outside the cycle," >&2
    echo "  or pause the loop first: 'roll loop pause'." >&2
    return 1
  fi

  # Apply. Guard every loop with a count check — `set -u` upstream makes
  # naked `"${arr[@]}"` over an empty array a hard error on bash 5.0.
  echo "$(msg offboard.applying_offboard)"
  if [ "${#files[@]}" -gt 0 ]; then
    for item in "${files[@]}"; do
      rm -f "$project_dir/$item" 2>/dev/null && echo "    removed file $item"
    done
  fi
  if [ "${#dirs[@]}" -gt 0 ]; then
    for item in "${dirs[@]}"; do
      rm -rf "$project_dir/$item" 2>/dev/null && echo "    removed dir  $item"
    done
  fi
  if [ "${#gi_entries[@]}" -gt 0 ]; then
    for item in "${gi_entries[@]}"; do
      local gi="$project_dir/.gitignore"
      if [[ -f "$gi" ]] && grep -qFx "$item" "$gi"; then
        local tmp; tmp=$(mktemp)
        grep -vFx "$item" "$gi" > "$tmp" || true
        mv "$tmp" "$gi"
        echo "    .gitignore -   $item"
      fi
    done
  fi
  if [ "${#plists[@]}" -gt 0 ]; then
    for item in "${plists[@]}"; do
      _launchctl_safe unload -w "$HOME/Library/LaunchAgents/$item" 2>/dev/null && echo "    unloaded     $item"
      rm -f "$HOME/Library/LaunchAgents/$item" 2>/dev/null
    done
  fi
  # Finally, remove the changeset file itself.
  rm -f "$changeset"
  ok "$(msg offboard.offboard_complete_offboard)"
}

# ═══════════════════════════════════════════════════════════════════════════════
# cmd_migrate
# US-ONBOARD-003: One-shot migration from old project layout to .roll/ structure.
#
# Moves process artifacts (.roll/backlog.md, .roll/proposals.md, .roll/features/, .roll/briefs/,
# .roll/dream/, .roll/design/, .roll/domain/) into .roll/. Also relocates user docs
# (guide/ → guide/, site/ → site/, site/slides/ → site/slides/).
#
# Three-state idempotency:
#   - old-only:   execute migration via git mv (single atomic commit)
#   - new-only:   no-op with "already migrated" message
#   - both:       error with conflict list (manual resolution required)
#   - neither:    no-op with "nothing to migrate"
# ═══════════════════════════════════════════════════════════════════════════════
cmd_migrate() {
  local dry_run=false
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --dry-run|-n) dry_run=true; shift ;;
      -h|--help) _migrate_help; return 0 ;;
      *) err "$(msg migrate.unknown_arg_1 "$1")"; return 1 ;;
    esac
  done

  # Must be in a git repo (git mv preserves history)
  if ! git rev-parse --git-dir >/dev/null 2>&1; then
    err "$(msg migrate.not_a_git_repository_roll_migrate)"
    return 1
  fi

  # Build canonical migration plan (read from stdout to avoid bash 4.3+ nameref)
  local -a moves=()
  while IFS= read -r _line; do moves+=("$_line"); done < <(_migrate_build_moves)

  # Detect state: do old paths exist? does .roll/ exist?
  local has_old=false has_new=false
  [[ -e .roll ]] && has_new=true
  local m src
  for m in "${moves[@]}"; do
    src="${m%%|*}"
    if [[ -e "$src" ]]; then
      has_old=true
      break
    fi
  done

  # Three-state dispatch
  if [[ "$has_new" == "true" && "$has_old" == "true" ]]; then
    err "$(msg migrate.both_old_and_new_structures_exist)"
    echo "" >&2
    echo "$(msg migrate.conflicting_paths)" >&2
    for m in "${moves[@]}"; do
      src="${m%%|*}"
      local tgt="${m##*|}"
      if [[ -e "$src" && -e "$tgt" ]]; then
        echo "  - $src  AND  $tgt  both exist" >&2
      fi
    done
    echo "" >&2
    err "$(msg migrate.resolve_manually_then_re_run)"
    return 1
  fi

  if [[ "$has_new" == "true" && "$has_old" == "false" ]]; then
    ok "$(msg migrate.already_migrated_roll_exists_no_old)"
    return 0
  fi

  if [[ "$has_old" == "false" ]]; then
    info "$(msg migrate.no_old_structure_detected_nothing_to)"
    return 0
  fi

  # State: old-only — proceed. Filter to actually existing paths.
  local -a active_moves=()
  for m in "${moves[@]}"; do
    src="${m%%|*}"
    [[ -e "$src" ]] && active_moves+=("$m")
  done

  if [[ ${#active_moves[@]} -eq 0 ]]; then
    warn "$(msg migrate.old_structure_markers_found_but_no)"
    return 0
  fi

  if [[ "$dry_run" == "true" ]]; then
    _migrate_preview "${active_moves[@]}"
    return 0
  fi

  # Real execution requires clean working tree (we'll create a single commit)
  if ! git diff --quiet --ignore-submodules HEAD 2>/dev/null; then
    err "$(msg migrate.working_tree_not_clean_commit_or)"
    return 1
  fi

  _migrate_execute "${active_moves[@]}"
}

# Build canonical migration plan as "src|target" pairs (one per line).
# Single source of truth for what migrates where.
# Returns via stdout (bash 3.2-compatible — no nameref).
_migrate_build_moves() {
  # Order matters: directory-renames must precede moves whose target_dir is
  # the same dir. Otherwise mkdir -p pre-creates the target, and git mv then
  # places the source INSIDE rather than renaming. Specifically:
  #   - docs/site → site   must precede docs/intro → site/slides
  #   - docs/guide/en → guide/en  must precede docs/practices/engineering-common-sense.md
  #
  # IMPORTANT: the LEFT side of each mapping is a literal OLD path. These must
  # NOT be sed'd during Story 5 code-ref migration — they drive the migration
  # for OTHER projects (and self-migrate idempotency).
  cat << 'EOF'
BACKLOG.md|.roll/backlog.md
PROPOSALS.md|.roll/proposals.md
docs/features.md|.roll/features.md
docs/features|.roll/features
docs/briefs|.roll/briefs
docs/dream|.roll/dream
docs/design|.roll/design
docs/domain|.roll/domain
docs/practices/loop-autorun-verification.md|.roll/verification/loop-autorun-verification.md
docs/site|site
docs/intro|site/slides
docs/guide/en|guide/en
docs/guide/zh|guide/zh
docs/practices/engineering-common-sense.md|guide/en/practices/engineering-common-sense.md
EOF
}

_migrate_preview() {
  info "$(msg migrate.migration_preview_dry_run)"
  echo ""
  printf "  %-60s → %s\n" "$(msg migrate.old_path)" "$(msg migrate.new_path)"
  local sep; sep=$(printf '─%.0s' {1..100})
  printf "  %s\n" "$sep"
  local m
  for m in "$@"; do
    local src="${m%%|*}" tgt="${m##*|}"
    printf "  %-60s → %s\n" "$src" "$tgt"
  done
  echo ""
  info "$(msg migrate.run_without_dry_run_to_execute)"
}

_migrate_execute() {
  info "$(msg migrate.migrating_paths_via_git_mv ${#@})"
  local moved=0 m
  for m in "$@"; do
    local src="${m%%|*}" tgt="${m##*|}"
    local target_dir; target_dir=$(dirname "$tgt")
    [[ -d "$target_dir" ]] || mkdir -p "$target_dir"
    git mv "$src" "$tgt" || {
      err "git mv failed: $src → $tgt"
      err "$(msg migrate.aborting_previous_moves_are_staged_but)"
      return 1
    }
    moved=$((moved + 1))
  done
  # Clean up empty docs/ shells
  if [[ -d "docs" ]]; then
    find docs -type d -empty -delete 2>/dev/null || true
  fi
  # Single atomic commit
  git commit --quiet -m "Migrate project layout to .roll/ structure

Atomic migration via 'roll migrate' command. Process artifacts moved
from root and docs/ into .roll/; user docs relocated to guide/ and site/.

Paths migrated: ${moved}"
  ok "$(msg migrate.migrated_paths_in_a_single_commit ${moved})"
  echo ""
  echo "$(msg migrate.next_steps)"
  echo "    git log -1                    # Inspect the migration commit"
  echo "    roll status                   # Verify new structure"
}

_migrate_help() {
  cat << 'EOF'
Usage: roll migrate [--dry-run]

Migrate this project's process artifacts (.roll/backlog.md, .roll/proposals.md,
.roll/features/, .roll/briefs/, .roll/dream/, .roll/design/, .roll/domain/)
into a .roll/ directory. Also relocates guide/ → guide/,
site/ → site/, site/slides/ → site/slides/.

Options:
  --dry-run, -n   Show what would be moved without modifying files
  --help, -h      Show this help

Three-state idempotency:
  - Only old paths present  → migration executes (single atomic commit)
  - Only .roll/ present     → no-op (already migrated)
  - Both present            → error with conflict list (manual review)
  - Neither                 → no-op (nothing to migrate)

Preconditions:
  - Current directory is a git repository
  - Working tree is clean (commit or stash changes first)

Uses git mv to preserve file history. On success, produces a single commit.
EOF
}

# ─── Helper: print a tidy summary of merge actions ───────────────────────────
print_merge_summary() {
  if [[ ${#_ROLL_MERGE_SUMMARY[@]} -eq 0 ]]; then
    return
  fi
  echo ""
  echo "$(msg migrate.summary)"
  for entry in "${_ROLL_MERGE_SUMMARY[@]}"; do
    local action="${entry%%|*}"
    local file="${entry##*|}"
    case "$action" in
      merged)      printf "  │  ${GREEN}✦ merged${NC}      %-30s│\n" "$file" ;;
      created)     printf "  │  ${GREEN}+ created${NC}     %-30s│\n" "$file" ;;
      overwritten) printf "  │  ${YELLOW}↺ overwritten${NC} %-30s│\n" "$file" ;;
      kept)        printf "  │  ${CYAN}· kept${NC}        %-30s│\n" "$file" ;;
      unchanged)   printf "  │    unchanged    %-30s│\n" "$file" ;;
    esac
  done
  echo "  └─────────────────────────────────────────────────────┘"
}

# ─── Helper: auto-detect project type by scanning project files ──────────────
scan_project_type_from_files() {
  local dir="${1:-.}"
  local has_frontend=false
  local has_backend=false
  local has_cli=false

  # Frontend signals
  if [[ -f "$dir/package.json" ]]; then
    grep -qiE '"react"|"vue"|"next"|"nuxt"|"vite"|"svelte"' "$dir/package.json" 2>/dev/null \
      && has_frontend=true
  fi
  [[ -d "$dir/src" || -d "$dir/app" || -d "$dir/pages" || -d "$dir/components" ]] \
    && has_frontend=true

  # Backend/API signals
  [[ -d "$dir/server" || -d "$dir/api" || -d "$dir/backend" ]] && has_backend=true
  [[ -f "$dir/go.mod" || -f "$dir/main.go" || -f "$dir/main.py" || -f "$dir/app.py" \
    || -f "$dir/Cargo.toml" || -f "$dir/requirements.txt" || -f "$dir/pyproject.toml" ]] \
    && has_backend=true
  # DB/ORM/server-side deps in package.json → backend signal
  if [[ -f "$dir/package.json" ]]; then
    grep -qiE '"prisma"|"@prisma/client"|"typeorm"|"sequelize"|"mongoose"|"drizzle-orm"|"@neondatabase/serverless"|"pg"|"mysql2"|"mongodb"|"redis"|"ioredis"|"express"|"fastify"|"koa"|"hapi"|"@hapi/hapi"|"apollo-server"|"graphql-yoga"|"trpc"' "$dir/package.json" 2>/dev/null \
      && has_backend=true
  fi
  # Prisma schema file is a definitive backend signal
  [[ -f "$dir/prisma/schema.prisma" ]] && has_backend=true

  # CLI signals (bin/ with executables, or cmd/ layout common in Go CLIs)
  [[ -d "$dir/bin" || -d "$dir/cmd" ]] && has_cli=true

  # Determine type
  if $has_frontend && $has_backend; then
    echo "fullstack"
  elif $has_frontend && ! $has_backend; then
    echo "frontend-only"
  elif $has_cli && ! $has_frontend; then
    echo "cli"
  elif $has_backend && ! $has_frontend; then
    echo "backend-service"
  else
    echo "unknown"
  fi
}

# ─── Helper: write starter .roll/backlog.md (no-op if exists) ──────────────────────
_write_backlog() {
  if [[ -f "$1" ]]; then
    _ROLL_MERGE_SUMMARY+=("unchanged|.roll/backlog.md")
    return
  fi
  mkdir -p "$(dirname "$1")"
  cat > "$1" << 'EOF'
# Project Backlog

## Epic: Initial Setup
| Story | Description | Status |
|-------|-------------|--------|

## Bug Fixes
| ID | Problem | Status |
|----|---------|--------|
EOF
  ok "Created: .roll/backlog.md"
  _ROLL_MERGE_SUMMARY+=("created|.roll/backlog.md")
}

# US-AGENT-003: seed .roll/agent-routes.yaml from a named template (default /
# minimal / heavy). Idempotent — leaves an existing file untouched. Templates
# live under ${ROLL_TEMPLATES}/agent-routes/<name>.yaml.
_init_seed_agent_routes() {
  local template_name="${1:-default}"
  local project_dir="${2:-$(pwd)}"
  local dest="${project_dir}/.roll/agent-routes.yaml"

  if [[ -f "$dest" ]]; then
    _ROLL_MERGE_SUMMARY+=("unchanged|.roll/agent-routes.yaml")
    return 0
  fi

  local src="${ROLL_TEMPLATES}/agent-routes/${template_name}.yaml"
  if [[ ! -f "$src" ]]; then
    err "agent-routes template not found: ${template_name} (looked for ${src})"
    return 1
  fi

  mkdir -p "$(dirname "$dest")"
  cp "$src" "$dest"
  ok "Created: .roll/agent-routes.yaml (template: ${template_name})"
  _ROLL_MERGE_SUMMARY+=("created|.roll/agent-routes.yaml")
}

_ensure_features_dir() {
  if [[ -d "$1" ]]; then
    _ROLL_MERGE_SUMMARY+=("unchanged|.roll/features/")
    return
  fi

  mkdir -p "$1"
  ok "Created: .roll/features/"
  _ROLL_MERGE_SUMMARY+=("created|.roll/features/")
}

# US-ONBOARD-019: write a Roll version stamp under .roll/.version when a project
# is onboarded. Going forward, this stamp is the canonical "this project was
# onboarded with Roll" signal — it lets `_check_structure` distinguish a
# genuine pre-2.0 Roll project (needs migrate) from a non-Roll project that
# coincidentally has BACKLOG.md / docs/features/ from another tool.
#
# Idempotent: an existing stamp is never overwritten, so the original install
# timestamp is preserved across re-runs of `roll init`.
_write_version_stamp() {
  local project_dir="$1"
  local stamp_path="$project_dir/.roll/.version"
  if [[ -f "$stamp_path" ]]; then
    _ROLL_MERGE_SUMMARY+=("unchanged|.roll/.version")
    return 0
  fi
  mkdir -p "$project_dir/.roll"
  local installed_at; installed_at=$(date -u +%FT%TZ)
  cat > "$stamp_path" <<EOF
# Roll project version stamp — written by \`roll init\` (US-ONBOARD-019).
# Used by \`_check_structure\` to recognise a previously-onboarded Roll project
# without depending on directory-name heuristics.
roll_version: "${VERSION}"
installed_at: "${installed_at}"
EOF
  _ROLL_MERGE_SUMMARY+=("created|.roll/.version")
  return 0
}

# US-ONBOARD-019: is <root> a Roll-onboarded project (current or pre-2.0)?
#
# Returns 0 (true) when at least one Roll-specific signal is present:
#   1. .roll/.version stamp (post-019 onboard)
#   2. BACKLOG.md with a Roll-1.x Story table or "Bug Fixes" section
#   3. PROPOSALS.md with a Roll-style "## Proposal" heading
#   4. docs/features/ containing US-/FIX-/REFACTOR- named .md files
#   5. docs/briefs/ or docs/dream/ directory non-empty
#
# A bare BACKLOG.md/PROPOSALS.md from another tool, or a generic
# docs/features/ folder, does NOT count — that's the bug US-ONBOARD-019
# fixes (false-positive migrate prompts on non-Roll projects).
_has_roll_signature() {
  local root="$1"

  # Signal 1 — post-019 version stamp
  [[ -f "$root/.roll/.version" ]] && return 0

  # Signal 2 — Roll-1.x BACKLOG.md content
  if [[ -f "$root/BACKLOG.md" ]]; then
    if grep -qE '^\| Story \| Description \| Status \|' "$root/BACKLOG.md" 2>/dev/null \
       || grep -qE '^## Epic:' "$root/BACKLOG.md" 2>/dev/null \
       || grep -qE '^\| ID \| Problem \| Status \|' "$root/BACKLOG.md" 2>/dev/null; then
      return 0
    fi
  fi

  # Signal 3 — Roll-style PROPOSALS.md
  if [[ -f "$root/PROPOSALS.md" ]]; then
    if grep -qE '^## Proposal' "$root/PROPOSALS.md" 2>/dev/null; then
      return 0
    fi
  fi

  # Signal 4 — Roll-named files under docs/features/
  if [[ -d "$root/docs/features" ]]; then
    if find "$root/docs/features" -maxdepth 2 -type f -name '*.md' 2>/dev/null \
         | grep -qE '/(US|FIX|REFACTOR)-[0-9]+'; then
      return 0
    fi
  fi

  # Signal 5 — Roll-1.x process artefacts (docs/briefs/ docs/dream/)
  local dir
  for dir in docs/briefs docs/dream; do
    if [[ -d "$root/$dir" ]] \
       && [[ -n "$(find "$root/$dir" -mindepth 1 -maxdepth 2 -type f 2>/dev/null | head -1)" ]]; then
      return 0
    fi
  done

  return 1
}

# ─── Helper: write starter .roll/features.md (no-op if exists) ────────────────
_write_features_md() {
  if [[ -f "$1" ]]; then
    _ROLL_MERGE_SUMMARY+=("unchanged|.roll/features.md")
    return
  fi
  mkdir -p "$(dirname "$1")"
  cat > "$1" << 'EOF'
# Features

> 产品视角的功能索引。每次发版时更新，使之与 BACKLOG 保持一致。

---

## Features by Epic

<!-- Add feature entries here as epics are completed -->
EOF
  ok "Created: .roll/features.md"
  _ROLL_MERGE_SUMMARY+=("created|.roll/features.md")
}

# ═══════════════════════════════════════════════════════════════════════════════
# COMMAND: status
# Show current state of conventions
# ═══════════════════════════════════════════════════════════════════════════════
_legacy_status() {
  echo -e "$(msg migrate.roll_convention_status_roll ${BOLD} ${NC})"
  echo ""

  if [[ -d "$ROLL_HOME" ]]; then
    ok "$(msg migrate.roll_exists_roll)"
  else
    err "$(msg migrate.roll_not_found_run_roll_setup)"
    return
  fi

  echo ""
  echo -e "$(msg migrate.global_conventions ${BOLD} ${NC})"
  for f in AGENTS.md CLAUDE.md GEMINI.md .cursor-rules project_rules.md; do
    if [[ -f "$ROLL_GLOBAL/$f" ]]; then
      echo -e "  ${GREEN}+${NC} $f"
    else
      echo -e "$(msg migrate.missing ${RED} ${NC} $f)"
    fi
  done

  echo ""
  echo -e "$(msg migrate.global_skills ${BOLD} ${NC})"
  if [[ -d "$ROLL_HOME/skills" ]]; then
    local count
    count=$(find "$ROLL_HOME/skills" -maxdepth 1 -type d | wc -l | tr -d ' ')
    count=$((count - 1))
    echo -e "$(msg migrate.roll_skills_skills_installed ${GREEN} ${NC} $count)"
  else
    echo -e "$(msg migrate.roll_skills_missing ${RED} ${NC})"
  fi

  echo ""
  echo -e "$(msg migrate.sync_targets ${BOLD} ${NC})"

  local _sync_found=0
  while IFS= read -r _entry; do
    _sync_found=1
    local _ai_d _cfg _src _tool_name
    _ai_d="$(_ai_dir "$_entry")"
    _cfg="$(_ai_config "$_entry")"
    _src="$(_ai_src "$_entry")"
    _tool_name="$(ai_tool_name "$_ai_d")"
    check_sync_status "$_tool_name" "$ROLL_GLOBAL/$_src" "$_ai_d/$_cfg"
  done < <(_get_ai_tools)
  if [[ "$_sync_found" -eq 0 ]]; then
    warn "$(msg migrate.no_ai_tools_configured_check_roll)"
    info "$(msg migrate.add_ai_entries_or_run_roll)"
  fi

  echo ""
  echo -e "$(msg migrate.skill_symlinks ${BOLD} ${NC})"
  local total_skills=0
  local wk_skills_real
  if [[ -d "$ROLL_HOME/skills" ]]; then
    # Count roll-* skill dirs to match the linked_count scope below
    total_skills=$(find "$ROLL_HOME/skills" -maxdepth 1 -mindepth 1 -type d -name "roll-*" | wc -l | tr -d ' ')
    wk_skills_real="$(canonical_dir "$ROLL_HOME/skills" 2>/dev/null || true)"
  fi
  local _skills_found=0
  while IFS= read -r _entry; do
    local ai_dir
    ai_dir="$(_ai_dir "$_entry")"
    [[ -d "$ai_dir" ]] || continue
    _skills_found=1
    local name name_lower
    name="$(ai_tool_name "$ai_dir")"
    name="$(echo "$name" | tr '[:lower:]' '[:upper:]' | cut -c1)$(echo "$name" | cut -c2-)"
    name_lower="$(lower_name "$name")"
    local skills_dir="$ai_dir/skills"
    if [[ -d "$skills_dir" ]]; then
      if [[ -L "$skills_dir" ]]; then
        local skills_target skills_real
        skills_target="$(readlink "$skills_dir")"
        skills_real="$(canonical_dir "$skills_dir" 2>/dev/null || true)"
        local skills_display="${skills_dir/#$HOME/~}"
        if [[ -n "$skills_real" && "$skills_real" == "$wk_skills_real" ]]; then
          echo -e "$(msg migrate.roll_skills_mounted ${GREEN} ${NC} $name $skills_display)"
        else
          echo -e "  ${YELLOW}~${NC} $name: $skills_display -> ${skills_target/#$HOME/~} (symlinked dir)"
        fi
        continue
      fi

      local linked_count skills_display
      skills_display="${skills_dir/#$HOME/~}"
      linked_count=$(find "$skills_dir" -maxdepth 1 -mindepth 1 -type l -name "roll-*" 2>/dev/null | wc -l | tr -d ' ')
      if [[ "$linked_count" -eq "$total_skills" ]] && [[ "$total_skills" -gt 0 ]]; then
        echo -e "  ${GREEN}=${NC} $name: $skills_display ($linked_count/$total_skills skills linked)"
      elif [[ "$linked_count" -gt 0 ]]; then
        echo -e "  ${YELLOW}~${NC} $name: $skills_display ($linked_count/$total_skills skills linked)"
      else
        echo -e "$(msg migrate.no_roll_skills_linked ${RED} ${NC} $name $skills_display)"
      fi
    else
      echo -e "$(msg migrate.not_found ${RED} ${NC} $name ${skills_dir/#$HOME/~})"
    fi
  done < <(_get_ai_tools)
  if [[ "$_skills_found" -eq 0 ]]; then
    warn "$(msg migrate.no_ai_tools_configured_check_roll_2)"
  fi

  echo ""
  echo -e "$(msg migrate.templates ${BOLD} ${NC})"
  for tpl in fullstack frontend-only backend-service cli; do
    if [[ -d "$ROLL_TEMPLATES/$tpl" ]]; then
      local count
      count=$(find "$ROLL_TEMPLATES/$tpl" -type f | wc -l | tr -d ' ')
      echo -e "  ${GREEN}+${NC} $tpl ($count files)"
    else
      echo -e "$(msg migrate.missing_2 ${RED} ${NC} $tpl)"
    fi
  done

  _status_loop_overview
}

cmd_status() {
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-status.py" "$@"
  else
    _legacy_status "$@"
  fi
}

_status_loop_overview() {
  [[ "$(uname)" != "Darwin" ]] && return 0

  local plists=()
  while IFS= read -r p; do
    [[ -f "$p" ]] && plists+=("$p")
  done < <(ls "${_LAUNCHD_DIR}"/com.roll.loop.*.plist 2>/dev/null)

  [[ "${#plists[@]}" -eq 0 ]] && return 0

  echo ""
  echo -e "$(msg status.loop_overview ${BOLD} ${NC})"

  for plist in "${plists[@]}"; do
    local label; label=$(basename "$plist" .plist)

    local proj_path
    proj_path=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)

    local proj_name path_note=""
    if [[ -n "$proj_path" && -d "$proj_path" ]]; then
      proj_name=$(basename "$proj_path")
    elif [[ -n "$proj_path" ]]; then
      proj_name=$(basename "$proj_path")
      path_note=" (path missing)"
    else
      proj_name="(unknown)"
    fi

    local state_icon
    if _launchd_is_loaded "$label"; then
      state_icon="${GREEN}●${NC}"
    else
      state_icon="${RED}○${NC}"
    fi

    local minute hour schedule
    minute=$(awk '/<key>Minute<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<integer>|<\/integer>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)
    hour=$(awk '/<key>Hour<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<integer>|<\/integer>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)
    if [[ -n "$hour" && -n "$minute" ]]; then
      schedule=$(printf "%02d:%02d" "$hour" "$minute")
    elif [[ -n "$minute" ]]; then
      schedule=":$(printf '%02d' "$minute")"
    else
      schedule="?"
    fi

    local todo_count=0
    if [[ -z "$path_note" && -f "${proj_path}/.roll/backlog.md" ]]; then
      todo_count=$(grep -c '📋 Todo' "${proj_path}/.roll/backlog.md" 2>/dev/null; true)
    fi

    echo -e "  ${state_icon} ${proj_name}${path_note}   ${schedule}   ${todo_count} pending"
  done
}

check_sync_status() {
  local name="$1"
  local src="$2"
  local dst="$3"

  local display="${dst/#$HOME/~}"
  local dst_dir
  dst_dir="$(dirname "$dst")"
  local wk_file="$dst_dir/roll.md"

  # Sync writes content to {dir}/roll.md and appends @roll.md to the main config.
  # So "in sync" means: roll.md exists + matches source + main config contains @roll.md.
  if [[ ! -f "$dst" ]]; then
    echo -e "$(msg status.not_synced ${RED} ${NC} $name $display)"
  elif [[ ! -f "$wk_file" ]]; then
    echo -e "$(msg status.out_of_sync_roll_md_missing ${YELLOW} ${NC} $name $display)"
  elif ! diff -q "$src" "$wk_file" &>/dev/null 2>&1; then
    echo -e "$(msg status.out_of_sync_roll_md_outdated ${YELLOW} ${NC} $name $display)"
  elif ! grep -qF "@roll.md" "$dst" 2>/dev/null; then
    echo -e "$(msg status.out_of_sync_roll_md_not ${YELLOW} ${NC} $name $display)"
  else
    echo -e "$(msg status.in_sync ${GREEN} ${NC} $name $display)"
  fi
}

# ═══════════════════════════════════════════════════════════════════════════════
# PEER REVIEW
# ═══════════════════════════════════════════════════════════════════════════════

_PEER_STATE_DIR="${ROLL_HOME}/.peer-state"

_peer_ensure_state_dir() {
  mkdir -p "$_PEER_STATE_DIR"
  mkdir -p "${_PEER_STATE_DIR}/logs"
}

# FIX-150a: project-local peer data directory (analogous to loop runtime dir).
_peer_project_dir() {
  local proj
  proj=$(pwd -P 2>/dev/null || pwd)
  # FIX-056: normalize path to canonical case on macOS case-insensitive filesystem.
  if [[ "$(uname -s 2>/dev/null)" == "Darwin" ]]; then
    local _canon
    _canon=$(realpath "$proj" 2>/dev/null) && proj="$_canon"
  fi
  # When inside a git worktree, resolve to main tree (same pattern as _project_slug).
  local _common
  _common=$(git -C "$proj" rev-parse --git-common-dir 2>/dev/null)
  if [[ -n "$_common" && "$_common" == *"/.git" ]]; then
    proj="${_common%/.git}"
  fi
  echo "${proj}/.roll/peer"
}

_peer_ensure_project_dir() {
  local dir
  dir=$(_peer_project_dir)
  mkdir -p "$dir/logs"
}

# FIX-150a: write a structured JSONL record to the project-local peer runs file.
_peer_write_record() {
  local from_tool="$1"
  local to_tool="$2"
  local round="$3"
  local verdict="$4"
  local tag="$5"
  local duration_sec="$6"
  local dir
  dir=$(_peer_project_dir)
  mkdir -p "$dir"
  local ts
  ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
  printf '{"ts":"%s","from":"%s","to":"%s","round":%s,"verdict":"%s","tag":"%s","duration_sec":%s}\n' \
    "$ts" "$from_tool" "$to_tool" "$round" "$verdict" "$tag" "$duration_sec" \
    >> "$dir/runs.jsonl"
}

_peer_state_file() {
  local pair="$1"
  local key="$2"
  echo "${_PEER_STATE_DIR}/${pair}_${key}"
}

_peer_get_state() {
  local pair="$1"
  local key="$2"
  local file
  file="$(_peer_state_file "$pair" "$key")"
  if [[ -f "$file" ]]; then
    cat "$file"
  else
    echo ""
  fi
}

_peer_set_state() {
  local pair="$1"
  local key="$2"
  local val="$3"
  _peer_ensure_state_dir
  printf '%s\n' "$val" > "$(_peer_state_file "$pair" "$key")"
}

_peer_normalize_pair() {
  local from="$1"
  local to="$2"
  printf '%s→%s\n' "$from" "$to"
}

_peer_detect_peers() {
  local peers=""
  for tool in claude kimi pi codex opencode cursor; do
    if command -v "$tool" &>/dev/null; then
      peers="${peers}${peers:+ }${tool}"
    fi
  done
  printf '%s\n' "$peers"
}

_peer_route() {
  local from="$1"
  local tag="${2:-default}"

  local map_val
  map_val="$(config_get "peer_capability_map_${tag}" "")"
  if [[ -z "$map_val" ]]; then
    map_val="$(config_get "peer_capability_map_default" "kimi claude pi")"
  fi

  local installed
  installed="$(_peer_detect_peers)"

  local candidate
  for candidate in $map_val; do
    [[ "$candidate" == "$from" ]] && continue
    if echo "$installed" | grep -qw "$candidate"; then
      local pair status
      pair="$(_peer_normalize_pair "$from" "$candidate")"
      status="$(_peer_get_state "$pair" "status")"
      if [[ "$status" != "abandoned" ]]; then
        printf '%s\n' "$candidate"
        return 0
      fi
    fi
  done

  printf '%s\n' ""
  return 1
}

# Open a Terminal.app window attached to the given tmux session (peer
# auto-attach). No-ops when muted or non-macOS.
# FIX-054: terminal selection removed — always dispatches to macOS
# Terminal.app for predictability (per-user detection silently failed on
# Ghostty upgrades).
# Uses `open -g` so the window appears in the background and does not steal
# focus from the user's active app (replaces a prior osascript-based
# capture-frontmost / restore-focus dance that triggered LaunchServices
# "where is <app>" prompts when the active process name differed from its
# .app bundle name, e.g. MSTeams vs Microsoft Teams.app).
_peer_auto_attach() {
  local session="$1"
  [ "$(uname)" = "Darwin" ] || return 0
  [ -n "${BATS_TEST_NUMBER:-}" ] && return 0
  [ -n "${ROLL_LOOP_NO_POPUP:-}" ] && return 0
  [ -f "$_LOOP_MUTE_FILE" ] && return 0
  local attach_cmd="${_SHARED_ROOT}/loop/attach-${session}.command"
  # Drop `exec` so the wrapping shell survives `tmux attach` exiting; pause
  # on `read` afterwards so the user can scroll back through the session's
  # output before closing the Terminal window. Without this the window
  # closes the instant the tmux session ends and the entire scrollback
  # disappears with it.
  printf '#!/bin/bash\ntmux attach -t %s\necho\necho "================================================================"\necho "  session ended. press enter to close this window."\necho "================================================================"\nread _\n' \
    "$session" > "$attach_cmd" 2>/dev/null || return 0
  chmod +x "$attach_cmd" 2>/dev/null || return 0
  open -g -a Terminal "$attach_cmd" >/dev/null 2>&1 || true
}

# Dispatch a peer CLI command inside an existing tmux session (window 0).
# Writes stdout to out_file, stderr to err_file. Blocks until done or timeout.
_peer_dispatch_in_tmux() {
  local session="$1" cmd_str="$2" out_file="$3" err_file="$4" timeout="${5:-180}"
  local done_file="${out_file}.done"
  local inner
  inner=$(mktemp /tmp/roll-peer-inner-XXXXXX.sh)
  {
    printf '#!/bin/bash -l\n'
    # FIX-050: portable PATH assembly (was hardcoded /opt/homebrew/bin)
    printf 'for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "$HOME/.local/bin" "$HOME/.kimi-code/bin"; do\n'
    printf '  case ":$PATH:" in *":$_d:"*) ;; *) [ -d "$_d" ] && PATH="$_d:$PATH" ;; esac\n'
    printf 'done; export PATH\n'
    printf '%s > %q 2> %q || true\n' "$cmd_str" "$out_file" "$err_file"
    printf 'touch %q\n' "$done_file"
  } > "$inner"
  chmod +x "$inner"
  tmux send-keys -t "${session}:0" "bash ${inner}; rm -f ${inner}" Enter
  local elapsed=0
  while [ ! -f "$done_file" ] && [ "$elapsed" -lt "$timeout" ]; do
    sleep 1
    elapsed=$((elapsed + 1))
  done
  # FIX-150c: if we hit the wall clock without the done marker, the agent
  # is still running inside the tmux session. Send Ctrl-C to interrupt it
  # so the cycle doesn't hang on a peer that's no longer being awaited.
  # Return 1 (timed out) vs 0 (completed within budget); _peer_call lifts
  # this into the global _PEER_LAST_TIMED_OUT flag.
  local _timed_out=0
  if [[ ! -f "$done_file" ]]; then
    tmux send-keys -t "${session}:0" C-c 2>/dev/null || true
    _timed_out=1
  fi
  rm -f "$done_file"
  return "$_timed_out"
}

_peer_call() {
  local to="$1"
  local prompt="$2"
  local session="${3:-}"
  local output=""
  local stderr_log
  stderr_log="${_PEER_STATE_DIR}/logs/.last_stderr.log"
  local call_timeout
  call_timeout="$(config_get "peer_call_timeout" "180")"

  # FIX-150c: signal back to caller whether this call hit the wall-clock
  # limit. Caller (_peer_*_state, ledger writer) records "timeout" verdict.
  # Reset per call so callers reading the previous result don't mis-attribute.
  _PEER_LAST_TIMED_OUT=0

  info "$(msg status.peer_call_timeout_s_peer ${call_timeout})"

  if [[ -n "$session" ]] && command -v tmux >/dev/null 2>&1 && tmux has-session -t "$session" 2>/dev/null; then
    local out_file
    out_file=$(mktemp)
    local cmd_str
    cmd_str=$(_agent_cmd_str "$to" peer "$prompt") || {
      err "$(msg status.unsupported_peer $to)"
      return 1
    }
    _peer_dispatch_in_tmux "$session" "$cmd_str" "$out_file" "$stderr_log" "$call_timeout"
    local _dispatch_rc=$?
    output="$(cat "$out_file" 2>/dev/null || true)"
    rm -f "$out_file"
    if [[ "$_dispatch_rc" -ne 0 ]]; then
      _PEER_LAST_TIMED_OUT=1
      warn "$(msg status.peer_call_timeout_killed "$to" "$call_timeout")"
    fi
  else
    _agent_argv "$to" peer "$prompt" || {
      err "$(msg status.unsupported_peer_2 $to)"
      return 1
    }
    # FIX-150c: hard timeout for non-tmux path. macOS has no `timeout`(1),
    # so use a background watchdog that SIGTERMs (then SIGKILLs after 2 s
    # grace) the agent process when it overruns. Output captured via tmp
    # file because we can't keep it in $output while juggling pids.
    # `wait` returns the agent's exit code; with `set -e` enabled by the
    # caller (loops, hooks, bats), a non-zero from the killed agent would
    # short-circuit out before we can read it — `|| _peer_exit=$?` keeps
    # the value flowing into the timeout check.
    local _out _peer_exit=0 _peer_pid _watchdog_pid
    _out=$(mktemp)
    "${_AGENT_ARGV[@]}" >"$_out" 2>"$stderr_log" &
    _peer_pid=$!
    (
      sleep "$call_timeout"
      kill -TERM "$_peer_pid" 2>/dev/null && {
        sleep 2
        kill -KILL "$_peer_pid" 2>/dev/null
      }
    ) &
    _watchdog_pid=$!
    wait "$_peer_pid" 2>/dev/null || _peer_exit=$?
    # Cancel watchdog if agent finished on time.
    # FIX-181: kill children (sleep) first so they cannot outlive the
    # watchdog and later hit a reused PID, then kill the watchdog itself.
    if command -v pkill >/dev/null 2>&1; then
      pkill -P "$_watchdog_pid" 2>/dev/null || true
    fi
    kill "$_watchdog_pid" 2>/dev/null || true
    wait "$_watchdog_pid" 2>/dev/null || true
    output="$(cat "$_out" 2>/dev/null || true)"
    rm -f "$_out"
    # SIGTERM kill → 143, SIGKILL → 137. Either means we tripped the
    # timeout watchdog (agent itself doesn't normally exit with those).
    if [[ "$_peer_exit" -eq 143 || "$_peer_exit" -eq 137 ]]; then
      _PEER_LAST_TIMED_OUT=1
      warn "$(msg status.peer_call_timeout_killed "$to" "$call_timeout")"
    fi
  fi

  printf '%s\n' "$output"
}

_peer_parse_resolution() {
  local output="$1"
  local resolution
  # Match AGREE/REFINE/OBJECT/ESCALATE near line start (only non-letters before it)
  # Covers: **AGREE**, ### 结论：AGREE, - AGREE:, * REFINE, OBJECT — ...
  resolution="$(printf '%s\n' "$output" | grep -oiE '^[^a-zA-Z]*\b(AGREE|REFINE|OBJECT|ESCALATE)\b' | head -1 | grep -oiE '\b(AGREE|REFINE|OBJECT|ESCALATE)\b' | tr '[:lower:]' '[:upper:]')"
  printf '%s\n' "$resolution"
}

_peer_update_state() {
  local pair="$1"
  local outcome="$2"
  local streak=0

  local prev_streak
  prev_streak="$(_peer_get_state "$pair" "streak")"
  if [[ "$prev_streak" =~ ^[0-9]+$ ]]; then
    streak="$prev_streak"
  fi

  if [[ "$outcome" == "AGREE" ]]; then
    streak=0
    _peer_set_state "$pair" "status" "active"
  else
    streak=$((streak + 1))
    if [[ "$streak" -ge 3 ]]; then
      _peer_set_state "$pair" "status" "abandoned"
    else
      _peer_set_state "$pair" "status" "degraded"
    fi
  fi

  _peer_set_state "$pair" "streak" "$streak"
  _peer_set_state "$pair" "last_outcome" "$outcome"
  _peer_set_state "$pair" "last_time" "$(date -Iseconds)"
}

cmd_peer() {
  local from_tool=""
  local to_tool=""
  local round=1
  local tag="default"
  local context_file=""
  local yolo=false
  local subcmd=""

  while [[ $# -gt 0 ]]; do
    case "$1" in
      --from) from_tool="$2"; shift 2 ;;
      --to) to_tool="$2"; shift 2 ;;
      --round) round="$2"; shift 2 ;;
      --tag) tag="$2"; shift 2 ;;
      --context) context_file="$2"; shift 2 ;;
      --yes|--yolo) yolo=true; shift ;;
      status) subcmd="status"; shift ;;
      reset) subcmd="reset"; shift; break ;;
      log) subcmd="log"; shift ;;
      runs) subcmd="runs"; shift ;;
      help|--help|-h) subcmd="help"; shift ;;
      *) err "$(msg peer.unknown_option_1)"; exit 1 ;;
    esac
  done

  case "$subcmd" in
    status) cmd_peer_status; return ;;
    reset) cmd_peer_reset "$@"; return ;;
    log) cmd_peer_log; return ;;
    runs) cmd_peer_runs "$@"; return ;;
    help) cmd_peer_help; return ;;
  esac

  if [[ -z "$from_tool" ]]; then
    err "$(msg peer.from_is_required)"
    echo ""
    cmd_peer_help
    exit 1
  fi

  if [[ -z "$to_tool" ]]; then
    to_tool="$(_peer_route "$from_tool" "$tag")"
    if [[ -z "$to_tool" ]]; then
      err "$(msg peer.no_available_peer_found_for_tag $tag)"
      echo ""
      info "Installed peers: $(_peer_detect_peers)"
      info "Capability map: $(config_get "peer_capability_map_${tag}" "$(config_get "peer_capability_map_default" "kimi claude pi")")"
      exit 1
    fi
    info "$(msg peer.auto_selected_peer $to_tool)"
  fi

  local pair
  pair="$(_peer_normalize_pair "$from_tool" "$to_tool")"

  local status
  status="$(_peer_get_state "$pair" "status")"
  if [[ "$status" == "abandoned" ]]; then
    err "$(msg peer.peer_pair_is_abandoned_run_roll $pair $from_tool $to_tool)"
    exit 1
  fi

  if [[ "$yolo" != "true" ]]; then
    local opt_out
    opt_out="$(config_get "peer_opt_out_seconds" "10")"
    info "Launching peer review: $from_tool → $to_tool (round $round, tag: $tag)"
    info "Press Enter to proceed or type 'n' to abort. Auto-executing in ${opt_out}s..."
    info "$(msg peer.en_peer_review $from_tool $to_tool $round $tag)"
    info "$(msg peer.en_enter_n ${opt_out})"

    local answer=""
    if IFS= read -r -t "$opt_out" answer 2>/dev/null; then
      if [[ "$answer" == "n" || "$answer" == "N" ]]; then
        info "$(msg peer.peer_review_aborted_by_user)"
        exit 0
      fi
    fi
  fi

  local start_epoch
  start_epoch=$(date +%s)

  local context=""
  if [[ -n "$context_file" && -f "$context_file" ]]; then
    context="$(cat "$context_file")"
  fi

  local prompt
  prompt="[PEER_REVIEW round=${round} tool=${from_tool}→${to_tool}]\n\n${context}"

  local peer_session=""
  if command -v tmux >/dev/null 2>&1; then
    peer_session="roll-peer-${from_tool}-${to_tool}"
    if ! tmux has-session -t "$peer_session" 2>/dev/null; then
      tmux new-session -d -s "$peer_session" -x 200 -y 50
    fi
    if [ -z "$(tmux list-clients -t "$peer_session" 2>/dev/null)" ]; then
      _peer_auto_attach "$peer_session"
    fi
  fi

  # FIX-150a: write logs to project-local path; keep global state dir for adaptive routing.
  _peer_ensure_project_dir
  local log_file
  log_file="$(_peer_project_dir)/logs/$(date +%Y%m%d_%H%M%S)_${from_tool}_${to_tool}.md"
  {
    echo "# Peer Review Log"
    echo ""
    echo "- **From**: $from_tool"
    echo "- **To**: $to_tool"
    echo "- **Round**: $round"
    echo "- **Tag**: $tag"
    echo "- **Time**: $(date -Iseconds)"
    echo ""
    echo "## Prompt"
    echo ""
    echo '```'
    printf '%s\n' "$prompt"
    echo '```'
    echo ""
    echo "## Response"
    echo ""
  } > "$log_file"

  info "$(msg peer.calling $to_tool)"
  local response
  response="$(_peer_call "$to_tool" "$prompt" "$peer_session")"

  local stderr_log
  stderr_log="${_PEER_STATE_DIR}/logs/.last_stderr.log"
  if [[ -f "$stderr_log" && -s "$stderr_log" ]]; then
    echo ""
    echo -e "$(msg peer.peer_stderr_peer ${BOLD} ${NC})"
    cat "$stderr_log"
    echo ""
  fi

  printf '%s\n' "$response" >> "$log_file"

  local resolution=""
  resolution="$(_peer_parse_resolution "$response")"

  if [[ -z "$resolution" ]]; then
    warn "$(msg peer.could_not_parse_resolution_from_peer)"
    resolution="UNKNOWN"
  fi

  _peer_update_state "$pair" "$resolution"

  # FIX-150a: write structured record for observability.
  local duration_sec=0
  duration_sec=$(( $(date +%s) - start_epoch ))
  _peer_write_record "$from_tool" "$to_tool" "$round" "$resolution" "$tag" "$duration_sec"

  echo ""
  echo -e "$(msg peer.peer_review_result_peer_review ${BOLD} ${NC})"
  echo "  Pair: $pair"
  echo "  Round: $round"
  echo "  Resolution: $resolution"
  echo ""

  case "$resolution" in
    AGREE)
      ok "$(msg peer.consensus_reached_proceed_with_execution)"
      ;;
    REFINE|OBJECT)
      if [[ "$round" -ge 3 ]]; then
        warn "$(msg peer.max_rounds_reached_escalating_to_user)"
      else
        info "$(msg peer.peer_requests_continue_to_round_round "${resolution}" "$((round + 1))")"
      fi
      ;;
    ESCALATE|UNKNOWN)
      warn "$(msg peer.peer_review_escalated_or_failed_human)"
      ;;
  esac

  echo ""
  info "Log: $log_file"

  local _should_kill=true
  case "$resolution" in
    REFINE|OBJECT) [[ "$round" -lt 3 ]] && _should_kill=false ;;
  esac
  if [[ "$_should_kill" == "true" ]] && [[ -n "$peer_session" ]] \
     && command -v tmux >/dev/null 2>&1 \
     && tmux has-session -t "$peer_session" 2>/dev/null; then
    tmux kill-session -t "$peer_session" 2>/dev/null || true
  fi

  case "$resolution" in
    AGREE) exit 0 ;;
    REFINE|OBJECT) exit 2 ;;
    *) exit 1 ;;
  esac
}

cmd_peer_status() {
  _peer_ensure_state_dir
  echo -e "$(msg peer_status.peer_review_status_peer_review ${BOLD} ${NC})"
  echo ""

  local found=0
  local status_file
  for status_file in "$_PEER_STATE_DIR"/*_status; do
    [[ -f "$status_file" ]] || continue
    found=1
    local pair status streak last_outcome last_time
    pair="$(basename "$status_file" | sed 's/_status$//')"
    status="$(_peer_get_state "$pair" "status")"
    streak="$(_peer_get_state "$pair" "streak")"
    last_outcome="$(_peer_get_state "$pair" "last_outcome")"
    last_time="$(_peer_get_state "$pair" "last_time")"

    local sc="$GREEN"
    [[ "$status" == "degraded" ]] && sc="$YELLOW"
    [[ "$status" == "abandoned" ]] && sc="$RED"

    echo -e "  ${sc}${pair}${NC}"
    echo "    Status: ${status:-active}"
    echo "    Streak: ${streak:-0}"
    echo "    Last: ${last_outcome:-none} @ ${last_time:-never}"
    echo ""
  done

  if [[ "$found" -eq 0 ]]; then
    info "$(msg peer_status.no_peer_review_history_yet)"
  fi

  echo ""
  info "Installed peers: $(_peer_detect_peers)"
}

cmd_peer_reset() {
  local target_pair=""
  local reset_all=false

  while [[ $# -gt 0 ]]; do
    case "$1" in
      --all) reset_all=true; shift ;;
      --from|--to|--round|--tag|--context|--yes|--yolo|status|reset|help|--help|-h) shift ;;
      *)
        if [[ -z "$target_pair" ]]; then
          target_pair="$1"
        fi
        shift
        ;;
    esac
  done

  _peer_ensure_state_dir

  if [[ "$reset_all" == "true" ]]; then
    rm -f "$_PEER_STATE_DIR"/*_status
    rm -f "$_PEER_STATE_DIR"/*_streak
    rm -f "$_PEER_STATE_DIR"/*_last_outcome
    rm -f "$_PEER_STATE_DIR"/*_last_time
    ok "$(msg peer_reset.all_peer_states_reset)"
    return
  fi

  if [[ -z "$target_pair" ]]; then
    err "$(msg peer_reset.usage_roll_peer_reset_from_to)"
    exit 1
  fi

  rm -f "$(_peer_state_file "$target_pair" "status")"
  rm -f "$(_peer_state_file "$target_pair" "streak")"
  rm -f "$(_peer_state_file "$target_pair" "last_outcome")"
  rm -f "$(_peer_state_file "$target_pair" "last_time")"
  ok "$(msg peer_reset.peer_state_reset_peer $target_pair)"
}

cmd_peer_help() {
  echo -e "${BOLD}roll peer — Cross-Agent Peer Review${NC}"
  echo ""
  echo "$(msg peer_help.usage_roll_peer_options)"
  echo ""
  echo "Options:"
  echo "$(msg peer_help.from_tool_originating_agent_kimi_claude)"
  echo "$(msg peer_help.to_tool_target_peer_auto_detected)"
  echo "$(msg peer_help.round_n_current_round_default_1)"
  echo "$(msg peer_help.tag_type_task_type_for_routing)"
  echo "$(msg peer_help.context_file_context_file_to_send)"
  echo "$(msg peer_help.yes_yolo_skip_opt_out_prompt)"
  echo ""
  echo "Subcommands:"
  echo "$(msg peer_help.status_show_peer_review_state)"
  echo "$(msg peer_help.log_show_latest_peer_transcript)"
  echo "$(msg peer_help.runs_show_recent_peer_review_runs)"
  echo "$(msg peer_help.reset_pair_all_reset_peer_state)"
  echo "$(msg peer_help.help_show_this_help)"
}

# FIX-150a: `roll peer runs [N]` — show recent peer review runs (project-local).
cmd_peer_runs() {
  local n=10
  while [[ $# -gt 0 ]]; do
    case "$1" in
      [0-9]*) n="$1"; shift ;;
      *) shift ;;
    esac
  done

  local dir
  dir=$(_peer_project_dir)
  local runs_file="$dir/runs.jsonl"

  if ! command -v jq >/dev/null 2>&1; then
    err "$(msg peer.jq_required_for_roll_peer_runs)"
    return 1
  fi

  if [[ ! -f "$runs_file" ]] || [[ ! -s "$runs_file" ]]; then
    echo "$(msg peer.no_peer_runs_yet)"
    return 0
  fi

  local reversed
  reversed=$(awk '{a[NR]=$0} END{for(i=NR; i>=1; i--) print a[i]}' "$runs_file")
  local recent
  recent=$(printf '%s\n' "$reversed" | head -n "$n")

  echo -e "${BOLD}Peer Review Runs${NC}"
  echo ""
  printf "%-19s  %-8s  %-10s  %-5s  %-10s  %s\n" "Time" "From" "To" "Rnd" "Verdict" "Tag"
  printf "%s\n" "───────────────────  ────────  ──────────  ─────  ──────────  ──────────"

  while IFS= read -r line; do
    [[ -z "$line" ]] && continue
    local ts from to round verdict tag
    ts=$(printf '%s' "$line" | jq -r '.ts // "—"')
    from=$(printf '%s' "$line" | jq -r '.from // "—"')
    to=$(printf '%s' "$line" | jq -r '.to // "—"')
    round=$(printf '%s' "$line" | jq -r '.round // "—"')
    verdict=$(printf '%s' "$line" | jq -r '.verdict // "—"')
    tag=$(printf '%s' "$line" | jq -r '.tag // "—"')
    printf "%-19s  %-8s  %-10s  %-5s  %-10s  %s\n" "$ts" "$from" "$to" "$round" "$verdict" "$tag"
  done <<<"$recent"
}

# FIX-150a: `roll peer log` — show the latest peer review transcript.
cmd_peer_log() {
  local dir
  dir=$(_peer_project_dir)
  local latest
  latest=$(ls "$dir/logs"/*.md 2>/dev/null | sort | tail -1 || true)
  if [[ -z "$latest" || ! -f "$latest" ]]; then
    echo "$(msg peer.no_peer_logs_found)"
    return 0
  fi
  cat "$latest"
}

# ═══════════════════════════════════════════════════════════════════════════════
# AGENT — per-project agent configuration
# ═══════════════════════════════════════════════════════════════════════════════

# REFACTOR-040: project agent preference moved from the project root
# (`.roll.yaml`) to `.roll/local.yaml`. The new location stays alongside other
# per-machine runtime state inside `.roll/`, never reaches git (.roll/ is
# gitignored), and keeps the project root clean. The old `.roll.yaml` location
# is still read as a fallback so existing checkouts keep working until the next
# `roll agent use` rewrites them in place.
_project_agent_pref_file() {
  echo ".roll/local.yaml"
}

_project_agent() {
  local pref new_pref
  new_pref=$(_project_agent_pref_file)
  if [[ -f "$new_pref" ]] && grep -q "^agent:" "$new_pref" 2>/dev/null; then
    grep "^agent:" "$new_pref" | awk '{print $2}' | tr -d '"' | head -1
  elif [[ -f ".roll.yaml" ]] && grep -q "^agent:" .roll.yaml 2>/dev/null; then
    grep "^agent:" .roll.yaml | awk '{print $2}' | tr -d '"' | head -1
  elif [[ -f "$ROLL_CONFIG" ]] && grep -q "primary_agent:" "$ROLL_CONFIG" 2>/dev/null; then
    grep "primary_agent:" "$ROLL_CONFIG" | awk '{print $2}' | tr -d '"' | head -1
  else
    echo "claude"
  fi
}

_skill_content() {
  # Strip YAML frontmatter (---...---) — it's roll-internal metadata, not agent instructions
  awk 'NR==1 && /^---$/{skip=1;next} skip && /^---$/{skip=0;next} !skip{print}' "$1"
}

_parse_review_verdict() {
  local output="$1"
  local line
  line=$(echo "$output" | grep -o '<!--VERDICT:[^>]*-->' | tail -1)
  [ -n "$line" ] || return 0
  local type reason
  type=$(echo "$line" | sed -E 's/<!--VERDICT:([A-Z_]+)(:.*)?-->/\1/')
  reason=$(echo "$line" | sed -E 's/<!--VERDICT:[A-Z_]+:?//; s/-->//' | sed 's/^ *//')
  echo "${type}${reason:+:${reason}}"
}

# REFACTOR-017: single source of truth for agent invocation argv.
# Sets the global _AGENT_ARGV array to the command + args for (agent, mode, prompt).
# Modes:
#   text  — structured text (claude --output-format text; codex exec)
#   plain — default output (claude -p; codex exec)
#   peer  — peer protocol (claude --output-format text; codex --json --output-last-message)
# Returns 1 on unknown agent. Adding a new agent only needs an entry here.
_agent_argv() {
  local agent="$1" mode="$2" prompt="$3"
  # US-ONBOARD-018: `interactive` mode launches the agent's REPL with the prompt
  # pre-loaded as the first user message. The user can then converse normally,
  # keep their tty, and exit with Ctrl-C or /exit. Used by `roll init` to auto-
  # start the chosen agent for $roll-onboard without a copy-paste handoff.
  # Convention: positional arg as initial prompt; no -p / exec / run / --quiet.
  case "$agent" in
    claude)
      case "$mode" in
        interactive) _AGENT_ARGV=(claude "$prompt") ;;
        text|peer)   _AGENT_ARGV=(claude -p --output-format text "$prompt") ;;
        *)           _AGENT_ARGV=(claude -p "$prompt") ;;
      esac ;;
    kimi)
      # FIX-126: Kimi upstream renamed binary from kimi-cli → kimi-code.
      # Prefer the new name when present; fall back through legacy names
      # so users mid-upgrade keep working until they reinstall.
      local _kimi_bin
      if command -v kimi-code >/dev/null 2>&1; then
        _kimi_bin=kimi-code
      elif command -v kimi-cli >/dev/null 2>&1; then
        _kimi_bin=kimi-cli
      else
        _kimi_bin=kimi
      fi
      case "$mode" in
        interactive) _AGENT_ARGV=("$_kimi_bin" "$prompt") ;;
        *)           _AGENT_ARGV=("$_kimi_bin" -p "$prompt") ;;  # FIX-133: kimi-code 无 --quiet,-p 自带 auto 审批
      esac ;;
    deepseek)
      # deepseek has the same argv shape in both modes (positional prompt).
      _AGENT_ARGV=(deepseek "$prompt") ;;
    pi)
      case "$mode" in
        interactive) _AGENT_ARGV=(pi "$prompt") ;;
        *)           _AGENT_ARGV=(pi -p "$prompt") ;;
      esac ;;
    codex|openai)
      case "$mode" in
        interactive) _AGENT_ARGV=(codex "$prompt") ;;
        peer)        _AGENT_ARGV=(codex exec --json --output-last-message "$prompt") ;;
        *)           _AGENT_ARGV=(codex exec "$prompt") ;;
      esac ;;
    opencode)
      case "$mode" in
        interactive) _AGENT_ARGV=(opencode "$prompt") ;;
        *)           _AGENT_ARGV=(opencode run "$prompt") ;;
      esac ;;
    gemini|agy|antigravity)
      # Antigravity (agy) replaces the deprecated Google Gemini CLI as of
      # late 2025. agy reuses ~/.gemini/ for config and reads GEMINI.md
      # natively, so the convention sync target is unchanged — only the
      # invoked binary changes.
      # FIX-153: non-interactive modes must use -p (headless) +
      # --dangerously-skip-permissions so the agent does not hang waiting
      # for a tty approval that never comes in loop/cron contexts.
      case "$mode" in
        interactive) _AGENT_ARGV=(agy -i "$prompt") ;;
        *)           _AGENT_ARGV=(agy -p --dangerously-skip-permissions "$prompt") ;;
      esac ;;
    qwen)
      # qwen has the same argv shape in both modes (positional prompt).
      _AGENT_ARGV=(qwen "$prompt") ;;
    *) return 1 ;;
  esac
}

# Build a printf %q-escaped command string for (agent, mode, prompt).
# Used where the command must be passed as a string (e.g. tmux send-keys).
_agent_cmd_str() {
  _agent_argv "$@" || return 1
  local i out
  printf -v out '%q' "${_AGENT_ARGV[0]}"
  for ((i = 1; i < ${#_AGENT_ARGV[@]}; i++)); do
    printf -v out '%s %q' "$out" "${_AGENT_ARGV[i]}"
  done
  printf '%s' "$out"
}

# Splice --dangerously-skip-permissions into _AGENT_ARGV for claude. Used by
# trusted, human-triggered, or autonomous flows that should not be blocked by
# Claude Code's pre-write "approve diff" UX (which silently never gets
# approved in `claude -p` pipe mode). No-op for non-claude agents and for
# already-bypassed argvs.
_agent_bypass_claude_perms() {
  [[ "${_AGENT_ARGV[0]}" == "claude" ]] || return 0
  local arg
  for arg in "${_AGENT_ARGV[@]}"; do
    [[ "$arg" == "--dangerously-skip-permissions" ]] && return 0
  done
  _AGENT_ARGV=("${_AGENT_ARGV[@]:0:2}" --dangerously-skip-permissions "${_AGENT_ARGV[@]:2}")
}

_agent_run_skill() {
  local skill="$1"
  local agent; agent=$(_project_agent)
  local skill_file="${ROLL_HOME}/skills/${skill}/SKILL.md"
  [[ -f "$skill_file" ]] || { err "Skill not found: ${skill}"; return 1; }
  local content; content=$(_skill_content "$skill_file")
  _agent_argv "$agent" text "$content" || {
    err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|antigravity>"
    return 1
  }
  "${_AGENT_ARGV[@]}"
}

# ═══════════════════════════════════════════════════════════════════════════════
# SLIDES — deck.md → HTML rendering pipeline (US-DECK-003 / 004 / 005)
#
#   roll slides build <slug>   render .roll/slides/<slug>/deck.md → .html, open
#   roll slides new "<topic>"  invoke selected agent with roll-deck skill (US-DECK-004)
#   roll slides list           list decks as a table (US-DECK-005)
#   roll slides preview <slug> open .roll/slides/<slug>.html in browser (US-DECK-005)
#
# All four subcommands are implemented: `build` (DECK-003), `new` (DECK-004),
# `list` / `preview` (DECK-005). The AI authoring step happens in `new`;
# everything else is pure bash.
# ═══════════════════════════════════════════════════════════════════════════════
# ─── Progress helpers (US-DECK-010) ──────────────────────────────────────────
# Shared state for slides progress reporting.

_SLIDES_PROGRESS_PHASES=()
_SLIDES_PROGRESS_START_TIME=0
_SLIDES_PROGRESS_CURRENT=""
_SLIDES_PROGRESS_PHASE_START_TIME=0
_SLIDES_PROGRESS_SPINNER_PID=""
_SLIDES_PROGRESS_QUIET=0

_slides_progress_init() {
  _SLIDES_PROGRESS_PHASES=("$@")
  _SLIDES_PROGRESS_START_TIME=$(date +%s)
  _SLIDES_PROGRESS_CURRENT=""
  _SLIDES_PROGRESS_PHASE_START_TIME=0
  _SLIDES_PROGRESS_SPINNER_PID=""
  _SLIDES_PROGRESS_QUIET=0
}

_slides_progress_quiet() {
  _SLIDES_PROGRESS_QUIET=1
}

_slides_progress_elapsed_str() {
  local elapsed="${1:-0}"
  local min=$(( elapsed / 60 ))
  local sec=$(( elapsed % 60 ))
  if [[ $min -gt 0 ]]; then
    printf '%dm %ds' "$min" "$sec"
  else
    printf '%ds' "$sec"
  fi
}

_slides_progress_detect_tty() {
  if [[ ! -t 1 ]]; then
    _slides_progress_quiet
  fi
}

_slides_progress_phase_enter() {
  local phase="$1"
  local now
  now=$(date +%s)

  # Stop any running spinner
  if [[ -n "${_SLIDES_PROGRESS_SPINNER_PID:-}" ]]; then
    kill "${_SLIDES_PROGRESS_SPINNER_PID}" 2>/dev/null || true
    wait "${_SLIDES_PROGRESS_SPINNER_PID}" 2>/dev/null || true
    _SLIDES_PROGRESS_SPINNER_PID=""
    if [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]]; then
      printf '\r\033[K' >&2
    fi
  fi

  # Print previous phase completion
  if [[ -n "$_SLIDES_PROGRESS_CURRENT" && $_SLIDES_PROGRESS_PHASE_START_TIME -gt 0 ]]; then
    local prev_elapsed=$(( now - _SLIDES_PROGRESS_PHASE_START_TIME ))
    local elapsed_str
    elapsed_str=$(_slides_progress_elapsed_str "$prev_elapsed")
    if [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]]; then
      printf '  ✅ %s (%s)\n' "$_SLIDES_PROGRESS_CURRENT" "$elapsed_str" >&2
    fi
  fi

  _SLIDES_PROGRESS_CURRENT="$phase"
  _SLIDES_PROGRESS_PHASE_START_TIME="$now"

  if [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]]; then
    printf '→ %s ...\n' "$phase" >&2
  fi
  return 0
}

_slides_progress_spinner_start() {
  [[ $_SLIDES_PROGRESS_QUIET -eq 1 ]] && return 0

  local chars='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
  local delay=0.1
  local label_file="${TEST_TMP:-/tmp}/roll-slides-spinner-label-$$"
  printf '%s' "$_SLIDES_PROGRESS_CURRENT" > "$label_file"
  (
    local i=0
    while true; do
      local label
      label=$(cat "$label_file" 2>/dev/null || echo "$_SLIDES_PROGRESS_CURRENT")
      printf '\r\033[K%s %s' "${chars:$i:1}" "$label" >&2
      i=$(( (i+1) % ${#chars} ))
      sleep "$delay"
    done
  ) &
  _SLIDES_PROGRESS_SPINNER_PID=$!
}

_slides_progress_spinner_label() {
  local label="$1"
  local label_file="${TEST_TMP:-/tmp}/roll-slides-spinner-label-$$"
  printf '%s' "$label" > "$label_file"
}

_slides_progress_ok() {
  local phase="$1"
  local elapsed="${2:-0}"
  local elapsed_str
  elapsed_str=$(_slides_progress_elapsed_str "$elapsed")
  [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]] && printf '  ✅ %s (%s)\n' "$phase" "$elapsed_str" >&2
}

# ─── slides help ─────────────────────────────────────────────────────────────
_slides_help() {
  cat <<'EOF'
roll slides — deck.md → HTML rendering
roll slides — 幻灯片 deck.md 渲染管线

USAGE  用法
  roll slides build <slug> [--no-open]
                          Render .roll/slides/<slug>/deck.md → .roll/slides/<slug>.html
                          渲染 deck.md 为 HTML 并自动打开浏览器
  roll slides new "<topic>" [--template <name>] [--no-build]
                          Generate deck.md via AI, then auto-build + open HTML
                          通过 AI 生成 deck.md，自动渲染并打开 HTML
  roll slides list        List all decks (built / stale / failed / unbuilt)
                          列出 .roll/slides/ 下所有幻灯片（四态）
  roll slides preview <slug> [--no-open]
                          Open .roll/slides/<slug>.html in the default browser
                          在浏览器中打开已渲染的幻灯片
  roll slides logs <slug> Show the last build failure log for a deck
                          显示幻灯片上次构建失败日志
  roll slides templates   List available slide templates (built-in + project)
                          列出可用模板（内置 + 项目自定义）
  roll slides delete <slug> [--force]
                          Delete a deck (dir + HTML) with confirmation prompt
                          删除幻灯片（含目录与 HTML），需确认

OPTIONS  选项
  --no-open               Skip auto-opening the rendered HTML in a browser
                          渲染后不自动打开浏览器
  --no-build              Skip auto-build after agent completes (deck.md only)
                          仅生成 deck.md，不自动渲染
  --force                 Skip confirmation prompt (delete subcommand)
                          跳过确认提示（delete 子命令）
  --help, -h              Show this help
                          显示本帮助
EOF
}

# Resolve the renderer / validator paths (shipped with the roll package).
_slides_lib() {
  printf '%s' "${ROLL_PKG_DIR}/lib"
}

# Resolve the template path for a given template name.
# Returns 0 + prints the path if the template exists, else returns 1.
_slides_template_path() {
  local name="$1"
  # US-DECK-013: project-level template override takes priority over built-in.
  local proj_tpl=".roll/slides/templates/${name}.html"
  if [[ -f "$proj_tpl" ]]; then
    printf '%s' "$proj_tpl"
    return 0
  fi
  local tpl="${ROLL_PKG_DIR}/lib/slides/templates/${name}.html"
  if [[ -f "$tpl" ]]; then
    printf '%s' "$tpl"
    return 0
  fi
  return 1
}

# Read the `template:` value from a deck.md frontmatter. Defaults to
# `introduction-v3` if the field is absent (validator will catch missing field
# separately).
_slides_template_for_deck() {
  local deck="$1"
  local tpl
  tpl=$(awk '
    /^---[[:space:]]*$/ { d++; if (d==2) exit; next }
    d==1 && /^template:[[:space:]]*/ {
      sub(/^template:[[:space:]]*/, "")
      gsub(/^["'\'']|["'\'']$/, "")
      print
      exit
    }
  ' "$deck" 2>/dev/null)
  [[ -n "$tpl" ]] || tpl="introduction-v3"
  printf '%s' "$tpl"
}

# Ensure .roll/.gitignore contains `slides/*.html` so the per-build HTML
# artefact is ignored by default. deck.md remains committable. Idempotent.
_slides_ensure_gitignore() {
  local gi=".roll/.gitignore"
  mkdir -p ".roll"
  if [[ -f "$gi" ]] && grep -qE '^slides/\*\.html$' "$gi" 2>/dev/null; then
    return 0
  fi
  # Preserve a trailing newline before appending.
  if [[ -f "$gi" ]] && [[ -s "$gi" ]] && [[ "$(tail -c 1 "$gi" 2>/dev/null)" != $'\n' ]]; then
    printf '\n' >>"$gi"
  fi
  printf 'slides/*.html\n' >>"$gi"
}

# Pick the browser-open command for the current OS. Echoes the command name;
# returns 1 if no opener is available (tests + headless CI).
_slides_open_cmd() {
  case "$(uname -s 2>/dev/null)" in
    Darwin) command -v open >/dev/null 2>&1 && { printf 'open'; return 0; } ;;
    Linux)  command -v xdg-open >/dev/null 2>&1 && { printf 'xdg-open'; return 0; } ;;
  esac
  return 1
}

cmd_slides_build() {
  local slug="" no_open=0
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --no-open) no_open=1; shift ;;
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_build.unknown_option_1)"; return 1 ;;
      *)
        if [[ -z "$slug" ]]; then
          slug="$1"; shift
        else
          err "$(msg slides_build.unexpected_argument_1)"; return 1
        fi
        ;;
    esac
  done

  if [[ -z "$slug" ]]; then
    err "Usage: roll slides build <slug> [--no-open]"
    echo "$(msg slides_build.usage_roll_slides_build_slug_no)" >&2
    return 1
  fi

  local deck=".roll/slides/${slug}/deck.md"
  if [[ ! -f "$deck" ]]; then
    err "Deck not found: ${deck}"
    echo "$(msg slides_build.en_deck ${deck})" >&2
    echo "  Hint: run 'roll slides new \"<topic>\"' to generate a new deck." >&2
    echo "$(msg slides_build.en_roll_slides_new)" >&2
    return 1
  fi

  local lib_dir; lib_dir=$(_slides_lib)
  local validator="${lib_dir}/slides-validate.py"
  local renderer="${lib_dir}/slides-render.py"
  if [[ ! -f "$validator" || ! -f "$renderer" ]]; then
    err "$(msg slides_build.slides_toolchain_missing_re_run_roll)"
    return 1
  fi

  # US-DECK-011: capture failure to .last-build.err for slides logs
  local err_file=".roll/slides/${slug}/.last-build.err"

  # 1. Validate first (fail-fast on AI-generated decks).
  # Exit 2 = grounding warning (schema OK, evidence below threshold) — warn but don't fail.
  local val_out val_exit
  val_out=$(python3 "$validator" "$deck" 2>&1) || val_exit=$?
  if [[ "${val_exit:-0}" -eq 2 ]]; then
    echo "[roll] ${val_out}" >&2
  elif [[ "${val_exit:-0}" -ne 0 ]]; then
    local ts; ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    mkdir -p ".roll/slides/${slug}"
    printf '[%s] stage=validate\n%s\n' "$ts" "$val_out" > "$err_file"
    # Print validator output so user can see what failed.
    printf '%s\n' "$val_out" >&2
    echo -e "${RED}[FAIL]${NC} $(msg slides_build.validation_failed_for "${deck}")" >&2
    echo "  $(msg slides_build.hint_fix_and_rerun "${deck}" "${slug}")" >&2
    return 1
  fi

  # 2. Resolve template + render.
  local tpl_name; tpl_name=$(_slides_template_for_deck "$deck")
  local tpl_path
  if ! tpl_path=$(_slides_template_path "$tpl_name"); then
    local ts; ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    mkdir -p ".roll/slides/${slug}"
    printf '[%s] stage=template\ntemplate not found: %s\n' "$ts" "$tpl_name" > "$err_file"
    echo -e "${RED}[FAIL]${NC} $(msg slides_build.template_not_found "${tpl_name}")" >&2
    # List available templates (built-in + project overrides).
    echo "  $(msg slides_build.available_templates)" >&2
    local builtin_dir="${ROLL_PKG_DIR}/lib/slides/templates"
    if [[ -d "$builtin_dir" ]]; then
      local t
      for t in "$builtin_dir"/*.html; do
        local n="${t##*/}"; n="${n%.html}"
        printf '    %-20s (builtin)\n' "$n" >&2
      done
    fi
    local proj_dir=".roll/slides/templates"
    if [[ -d "$proj_dir" ]]; then
      local t
      for t in "$proj_dir"/*.html; do
        local n="${t##*/}"; n="${n%.html}"
        printf '    %-20s (project)\n' "$n" >&2
      done
    fi
    echo "  $(msg slides_build.templates_list_hint)" >&2
    return 1
  fi

  local out=".roll/slides/${slug}.html"
  mkdir -p ".roll/slides"
  local render_out; render_out=$(python3 "$renderer" "$deck" "$tpl_path" "$out" 2>&1) || {
    local ts; ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    mkdir -p ".roll/slides/${slug}"
    printf '[%s] stage=render\n%s\n' "$ts" "$render_out" > "$err_file"
    echo -e "${RED}[FAIL]${NC} $(msg slides_build.renderer_crashed_for "${deck}")" >&2
    echo "  $(msg slides_build.see_full_error_logs "${slug}")" >&2
    local last_lines; last_lines=$(printf '%s' "$render_out" | tail -n 5)
    if [[ -n "$last_lines" ]]; then
      echo "  $(msg slides_build.last_5_lines_of_renderer_output)" >&2
      printf '%s\n' "$last_lines" >&2
    fi
    return 1
  }

  # US-DECK-011: build succeeded — remove any stale .last-build.err
  rm -f "$err_file" 2>/dev/null || true

  # 3. Default-ignore the HTML artefact so it doesn't accidentally get committed.
  _slides_ensure_gitignore

  ok "$(msg slides_build.rendered ${out})"

  # 4. Auto-open browser unless suppressed (or running inside bats tests).
  if [[ "$no_open" -eq 1 ]] || [[ -n "${BATS_TEST_NUMBER:-}" ]] || [[ -n "${ROLL_SLIDES_NO_OPEN:-}" ]]; then
    return 0
  fi
  local opener
  if opener=$(_slides_open_cmd); then
    "$opener" "$out" >/dev/null 2>&1 || true
  fi
  return 0
}

# ─── US-DECK-005 ─────────────────────────────────────────────────────────────
# Read a frontmatter field from deck.md. Returns empty string if absent.
# Stops scanning at the closing `---` so YAML body keys can't leak through.
_slides_frontmatter_field() {
  local deck="$1" field="$2"
  awk -v field="$field" '
    /^---[[:space:]]*$/ { d++; if (d==2) exit; next }
    d==1 {
      pat = "^" field "[[:space:]]*:[[:space:]]*"
      if ($0 ~ pat) {
        sub(pat, "")
        gsub(/^["'\'']|["'\'']$/, "")
        print
        exit
      }
    }
  ' "$deck" 2>/dev/null
}

# Format a byte count for human-friendly display: 1234 → "1.2K", 2345678 → "2.2M".
# Bash arithmetic only — no `bc`, no `numfmt` dependency.
_slides_human_size() {
  local bytes="${1:-0}"
  if [[ "$bytes" -lt 1024 ]]; then
    printf '%dB' "$bytes"
  elif [[ "$bytes" -lt 1048576 ]]; then
    local tenth=$(( (bytes * 10) / 1024 ))
    printf '%d.%dK' "$((tenth / 10))" "$((tenth % 10))"
  else
    local tenth=$(( (bytes * 10) / 1048576 ))
    printf '%d.%dM' "$((tenth / 10))" "$((tenth % 10))"
  fi
}

cmd_slides_list() {
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_list.unknown_option_1)"; return 1 ;;
      *) err "$(msg slides_list.unexpected_argument_1)"; return 1 ;;
    esac
  done

  local slides_dir=".roll/slides"
  if [[ ! -d "$slides_dir" ]]; then
    info "$(msg slides_list.no_decks_found_under_roll_slides)"
    echo "  Hint: run 'roll slides new \"<topic>\"' to create one."
    echo "$(msg slides_list.en_roll_slides_new)"
    return 0
  fi

  local -a slugs=()
  local d slug
  shopt -s nullglob
  for d in "$slides_dir"/*/; do
    slug="${d%/}"
    slug="${slug##*/}"
    if [[ -f "${d}deck.md" ]]; then
      slugs+=("$slug")
    fi
  done
  shopt -u nullglob

  if [[ "${#slugs[@]}" -eq 0 ]]; then
    info "$(msg slides_list.no_decks_found_under_roll_slides_2)"
    echo "  Hint: run 'roll slides new \"<topic>\"' to create one."
    echo "$(msg slides_list.en_roll_slides_new_2)"
    return 0
  fi

  local -a sorted_slugs
  IFS=$'\n' sorted_slugs=($(printf '%s\n' "${slugs[@]}" | sort))
  unset IFS

  printf '%-20s  %-20s  %-12s  %-12s  %-8s  %s\n' \
    "slug" "template" "total_slides" "created" "built" "size"
  printf '%-20s  %-20s  %-12s  %-12s  %-8s  %s\n' \
    "----" "--------" "------------" "-------" "------" "----"

  local s deck html err_file template total created built size bytes
  for s in "${sorted_slugs[@]}"; do
    deck="${slides_dir}/${s}/deck.md"
    html="${slides_dir}/${s}.html"
    err_file="${slides_dir}/${s}/.last-build.err"
    template=$(_slides_frontmatter_field "$deck" "template")
    [[ -z "$template" ]] && template="-"
    total=$(_slides_frontmatter_field "$deck" "total_slides")
    [[ -z "$total" ]] && total="-"
    created=$(_slides_frontmatter_field "$deck" "created")
    [[ -z "$created" ]] && created="-"
    # US-DECK-011: 4-state built column
    if [[ -f "$err_file" ]]; then
      built="⚠ failed"
      size="-"
    elif [[ -f "$html" ]]; then
      # Check if deck.md is newer than the HTML (stale)
      if [[ "$deck" -nt "$html" ]]; then
        built="≈ stale"
        size="-"
      else
        built="✓ built"
        bytes=$(wc -c <"$html" 2>/dev/null | tr -d ' ')
        [[ -z "$bytes" ]] && bytes=0
        size=$(_slides_human_size "$bytes")
      fi
    else
      built="✗ unbuilt"
      size="-"
    fi
    printf '%-20s  %-20s  %-12s  %-12s  %-8s  %s\n' \
      "$s" "$template" "$total" "$created" "$built" "$size"
  done
  return 0
}

cmd_slides_preview() {
  local slug="" no_open=0
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --no-open) no_open=1; shift ;;
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_preview.unknown_option_1)"; return 1 ;;
      *)
        if [[ -z "$slug" ]]; then
          slug="$1"; shift
        else
          err "$(msg slides_preview.unexpected_argument_1)"; return 1
        fi
        ;;
    esac
  done

  if [[ -z "$slug" ]]; then
    err "Usage: roll slides preview <slug> [--no-open]"
    echo "$(msg slides_preview.usage_roll_slides_preview_slug_no)" >&2
    return 1
  fi

  local html=".roll/slides/${slug}.html"
  if [[ ! -f "$html" ]]; then
    err "Rendered HTML not found: ${html}"
    echo "$(msg slides_preview.en_html ${html})" >&2
    echo "  Hint: run 'roll slides build ${slug}' first to render it." >&2
    echo "$(msg slides_preview.en_roll_slides_build ${slug})" >&2
    return 1
  fi

  ok "$(msg slides_preview.preview ${html})"

  if [[ "$no_open" -eq 1 ]] || [[ -n "${BATS_TEST_NUMBER:-}" ]] || [[ -n "${ROLL_SLIDES_NO_OPEN:-}" ]]; then
    return 0
  fi
  local opener
  if opener=$(_slides_open_cmd); then
    "$opener" "$html" >/dev/null 2>&1 || true
  fi
  return 0
}

# ─── US-DECK-011 ─────────────────────────────────────────────────────────────
cmd_slides_logs() {
  local slug=""
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_logs.unknown_option_1)"; return 1 ;;
      *)
        if [[ -z "$slug" ]]; then
          slug="$1"; shift
        else
          err "$(msg slides_logs.unexpected_argument_1)"; return 1
        fi
        ;;
    esac
  done

  if [[ -z "$slug" ]]; then
    err "Usage: roll slides logs <slug>"
    echo "$(msg slides_logs.usage_roll_slides_logs_slug)" >&2
    return 1
  fi

  local deck_dir=".roll/slides/${slug}"
  local err_file="${deck_dir}/.last-build.err"

  if [[ ! -d "$deck_dir" ]] || [[ ! -f "${deck_dir}/deck.md" ]]; then
    err "$(msg slides_logs.deck_not_found ${slug})"
    return 1
  fi

  if [[ ! -f "$err_file" ]]; then
    info "$(msg slides_logs.no_failure_records_for ${slug})"
    return 0
  fi

  cat "$err_file"
  return 0
}

# ─── US-DECK-014 ─────────────────────────────────────────────────────────────
cmd_slides_delete() {
  local slug="" force=0
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --force) force=1; shift ;;
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_delete.unknown_option_1)"; return 1 ;;
      *)
        if [[ -z "$slug" ]]; then
          slug="$1"; shift
        else
          err "$(msg slides_delete.unexpected_argument_1)"; return 1
        fi
        ;;
    esac
  done

  if [[ -z "$slug" ]]; then
    err "Usage: roll slides delete <slug> [--force]"
    echo "$(msg slides_delete.usage_roll_slides_delete_slug_force)" >&2
    return 1
  fi

  local deck_dir=".roll/slides/${slug}"
  local html=".roll/slides/${slug}.html"

  if [[ ! -d "$deck_dir" ]] || [[ ! -f "${deck_dir}/deck.md" ]]; then
    err "$(msg slides_delete.deck_not_found ${slug})"
    return 1
  fi

  # Non-TTY must use --force (skip interactive confirmation)
  if [[ $force -eq 0 ]]; then
    if [[ ! -t 0 ]]; then
      err "$(msg slides_delete.non_interactive_terminal_must_use_force)"
      return 1
    fi
    printf '%s ' "$(msg slides_delete.prompt "$slug")" >&2
    read -r answer
    case "$answer" in
      [yY]|[yY][eE][sS]) : ;;
      *) info "$(msg slides_delete.cancelled)"; return 0 ;;
    esac
  fi

  # Remove deck directory and HTML file
  rm -rf "$deck_dir" 2>/dev/null || true
  rm -f "$html" 2>/dev/null || true
  ok "$(msg slides_delete.deleted ${slug})"
  return 0
}

# ─── US-DECK-014 ─────────────────────────────────────────────────────────────
cmd_slides_templates() {
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_templates.unknown_option_1)"; return 1 ;;
      *) err "$(msg slides_templates.unexpected_argument_1)"; return 1 ;;
    esac
  done

  local seen=""
  local found=0
  local name base path source

  printf '%-24s  %-12s  %s\n' "name" "source" "path"
  printf '%-24s  %-12s  %s\n' "----" "------" "----"

  # Built-in templates (shipped with roll package)
  local builtin_dir="${ROLL_PKG_DIR}/lib/slides/templates"
  if [[ -d "$builtin_dir" ]]; then
    shopt -s nullglob
    for tpl in "$builtin_dir"/*.html; do
      name="${tpl##*/}"
      name="${name%.html}"
      printf '%-24s  %-12s  %s\n' "$name" "builtin" "$tpl"
      found=1
    done
    shopt -u nullglob
  fi

  # Project-level overrides (.roll/slides/templates/)
  local proj_dir=".roll/slides/templates"
  if [[ -d "$proj_dir" ]]; then
    shopt -s nullglob
    for tpl in "$proj_dir"/*.html; do
      name="${tpl##*/}"
      name="${name%.html}"
      # Mark as project override if same name exists in builtin
      if [[ -f "${builtin_dir}/${name}.html" ]]; then
        source="project (override)"
      else
        source="project"
      fi
      printf '%-24s  %-12s  %s\n' "$name" "$source" "$tpl"
      found=1
    done
    shopt -u nullglob
  fi

  if [[ $found -eq 0 ]]; then
    info "$(msg slides_templates.no_templates_found)"
  fi
  return 0
}

# ─── US-DECK-004 ─────────────────────────────────────────────────────────────
# Turn a topic string into a kebab-case slug.
# Lower-cases, replaces any run of non-alphanumerics with a single dash,
# strips leading/trailing dashes. Matches the convention assumed by
# `roll slides build <slug>` (lowercase kebab) and the schema.
_slides_topic_slug() {
  local topic="$1"
  local slug
  slug=$(printf '%s' "$topic" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9' '-')
  # Strip leading/trailing dashes.
  slug="${slug#-}"
  slug="${slug%-}"
  printf '%s' "$slug"
}

# `roll slides new "<topic>" [--template <name>]`
# AI authoring entry point. Loads the `roll-deck` skill, builds a single text
# prompt containing the skill body + topic + slug + template, and hands it
# to the selected project agent. The agent is responsible for writing
# `.roll/slides/<slug>/deck.md` (and nothing else). After the agent exits,
# we print a bilingual hint pointing at `roll slides build <slug>`.
cmd_slides_new() {
  local topic="" template="introduction-v3" quiet=0 no_build=0
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --template)
        [[ -n "${2:-}" ]] || { err "$(msg slides_new.template_requires_value)"; return 1; }
        template="$2"; shift 2 ;;
      --template=*) template="${1#--template=}"; shift ;;
      --quiet) quiet=1; shift ;;
      --no-build) no_build=1; shift ;;
      --help|-h) _slides_help; return 0 ;;
      --*) err "$(msg slides_new.unknown_option_1)"; return 1 ;;
      *)
        if [[ -z "$topic" ]]; then
          topic="$1"; shift
        else
          err "$(msg slides_new.unexpected_argument_1)"; return 1
        fi
        ;;
    esac
  done

  if [[ -z "$topic" ]]; then
    err "Usage: roll slides new \"<topic>\" [--template <name>] [--quiet] [--no-build]"
    echo "$(msg slides_new.en_roll_slides_new_template)" >&2
    return 1
  fi

  local slug; slug=$(_slides_topic_slug "$topic")
  if [[ -z "$slug" ]]; then
    err "Could not derive a slug from topic: $topic"
    echo "$(msg slides_new.en_slug $topic)" >&2
    return 1
  fi

  # Progress: init with 5 phases, detect quiet mode
  _slides_progress_init launching generating validating rendering opening
  _slides_progress_detect_tty
  if [[ "$quiet" -eq 1 ]]; then
    _slides_progress_quiet
  fi

  # Ctrl-C trap: stop spinner and clean cursor
  trap '_slides_progress_phase_enter "cancelled" 2>/dev/null; exit 130' INT TERM

  # Phase: launching
  _slides_progress_phase_enter "launching"

  local skill_file="${ROLL_PKG_DIR}/skills/roll-deck/SKILL.md"
  [[ -f "$skill_file" ]] || { err "Skill not found: ${skill_file}"; return 1; }
  local skill_body; skill_body=$(_skill_content "$skill_file")

  local agent; agent=$(_project_agent)

  # Compose the full prompt
  local prompt
  prompt="$(cat <<EOF
${skill_body}

---

# Task

topic: ${topic}
slug: ${slug}
template: ${template}
target_file: .roll/slides/${slug}/deck.md

Generate the 18-slide bilingual deck.md for the topic above, following the workflow and hard constraints in this skill. Write exactly one file: .roll/slides/${slug}/deck.md. Then print the bilingual "Next" hint.

按本 skill 的工作流和硬约束生成 18 张双语 slide 的 deck.md。只写一个文件：.roll/slides/${slug}/deck.md，然后打印双语 "Next" 提示。
EOF
)"

  _agent_argv "$agent" text "$prompt" || {
    err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|antigravity>"
    trap - INT TERM
    return 1
  }

  # Phase: generating — launch agent in background + spinner + file-watch
  _slides_progress_phase_enter "generating"

  local deck_file=".roll/slides/${slug}/deck.md"
  local deck_dir; deck_dir="$(dirname "$deck_file")"
  mkdir -p "$deck_dir"

  # Start spinner
  _slides_progress_spinner_start

  # Launch agent in background
  "${_AGENT_ARGV[@]}" &
  local agent_pid=$!

  # File-watch: poll deck.md for ## Slide N count and update spinner label
  local slide_count=0 last_slide_count=-1
  while kill -0 "$agent_pid" 2>/dev/null; do
    if [[ -f "$deck_file" ]]; then
      slide_count=$(grep -c '^## Slide ' "$deck_file" 2>/dev/null || echo 0)
      if [[ "$slide_count" != "$last_slide_count" ]]; then
        last_slide_count="$slide_count"
        if [[ "$slide_count" -gt 0 ]]; then
          _slides_progress_spinner_label "generating slide ${slide_count}/18"
        fi
      fi
    fi
    sleep 1
  done

  # Agent finished — stop spinner
  if [[ -n "${_SLIDES_PROGRESS_SPINNER_PID:-}" ]]; then
    kill "${_SLIDES_PROGRESS_SPINNER_PID}" 2>/dev/null || true
    wait "${_SLIDES_PROGRESS_SPINNER_PID}" 2>/dev/null || true
    _SLIDES_PROGRESS_SPINNER_PID=""
    [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]] && printf '\r\033[K' >&2
  fi

  wait "$agent_pid"
  local rc=$?

  # Phase: validating
  _slides_progress_phase_enter "validating"

  # If deck.md exists, validate it
  if [[ -f "$deck_file" ]]; then
    local lib_dir; lib_dir=$(_slides_lib)
    if python3 "${lib_dir}/slides-validate.py" "$deck_file" >/dev/null 2>&1; then
      : # validation passed
    else
      [[ $_SLIDES_PROGRESS_QUIET -eq 0 ]] && printf '  ⚠️ validation warnings (build may surface details)\n' >&2
    fi
  fi

  # Phase: rendering + opening (unless --no-build)
  if [[ "$no_build" -eq 0 ]] && [[ -f "$deck_file" ]]; then
    _slides_progress_phase_enter "rendering"
    cmd_slides_build "$slug" --no-open
    local build_rc=$?

    if [[ $build_rc -eq 0 ]]; then
      _slides_progress_phase_enter "opening"
      local opener
      if opener=$(_slides_open_cmd); then
        "$opener" ".roll/slides/${slug}.html" >/dev/null 2>&1 || true
      fi
    fi
  fi

  trap - INT TERM

  # Print Next hint (unless --quiet flag was explicitly passed)
  echo
  echo "Next:  roll slides build ${slug}"
  echo "$(msg slides_new.en_roll_slides_build ${slug})"

  return "$rc"
}

# ─── cmd_prices (US-VIEW-013 / FIX-116) ───────────────────────────────────────
# `roll prices show`     — print the current price snapshot table.
# `roll prices refresh`  — fetch live pricing docs, diff vs latest snapshot,
#                          write a new snapshot when rates have changed.
# FIX-116: --vendor flag for multi-vendor support.
_prices_help() {
  cat <<'EOF'
Usage: roll prices <subcommand> [--url URL] [--vendor VENDOR]
      roll prices <子命令> [--url 网址] [--vendor 厂商]

Subcommands:
  show     Print the current price snapshot table.
           显示当前价格快照表。
  refresh  Fetch the official pricing docs, diff against the latest snapshot,
           and write a new snapshot only when rates have changed.
           拉取官方价格文档与最新快照对比，有变化才落新快照。

Options:
  --vendor anthropic|deepseek|kimi  Target vendor for refresh (default: anthropic).
                                    指定拉取价格的厂商（默认：anthropic）。
EOF
}

cmd_prices_show() {
  local lib_dir="${ROLL_PKG_DIR}/lib"
  python3 - "$lib_dir" <<'PY'
import json, os, sys
lib_dir = sys.argv[1]
sys.path.insert(0, lib_dir)
import model_prices as mp

version, effective_at, source_url = mp.snapshot_meta()
print(f"price snapshot  价格快照")
print(f"  version        {version}")
print(f"  effective_at   {effective_at}")
print(f"  snapshots      {len(mp._SNAPSHOTS)} loaded  已加载")
for snap in mp._SNAPSHOTS:
    v = snap.get("vendor", "—")
    c = snap.get("currency", "—")
    print(f"    {v:<12} {c:>4}  {snap['source_url']}")
print()
print(f"  {'model':<24}{'cur':>4}{'in':>10}{'out':>10}{'cw':>10}{'cr':>10}")
for model in sorted(mp.PRICES):
    p = mp.PRICES[model]
    cur = mp.currency_for(model)
    print(f"  {model:<24}{cur:>4}{p['in']:>10.4f}{p['out']:>10.4f}{p['cache_create']:>10.4f}{p['cache_read']:>10.4f}")
print()
print("rates per million tokens  每百万 token 单价")
PY
}

cmd_prices_refresh() {
  local url=""
  local vendor=""
  while (( $# > 0 )); do
    case "$1" in
      --url) url="$2"; shift 2 ;;
      --vendor) vendor="$2"; shift 2 ;;
      *)     err "$(msg prices_refresh.unknown_flag_1)"; return 1 ;;
    esac
  done
  local lib_dir="${ROLL_PKG_DIR}/lib"
  python3 - "$lib_dir" "$url" "$vendor" <<'PY'
import os, sys
lib_dir = sys.argv[1]
override_url = sys.argv[2] if len(sys.argv) > 2 else ""
vendor = sys.argv[3] if len(sys.argv) > 3 else ""
sys.path.insert(0, lib_dir)
import prices_fetcher as pf

snapshot_dir = os.path.join(lib_dir, "prices")

if vendor:
    if vendor not in pf.VENDOR_REGISTRY:
        print(f"$(msg prices_refresh.roll_unknown_vendor_vendor)", file=sys.stderr)
        print(f"$(msg prices_refresh.roll_known_vendors_join_sorted_vendor)", file=sys.stderr)
        sys.exit(1)
else:
    vendor = "anthropic"

try:
    action, changes = pf.refresh(
        snapshot_dir=snapshot_dir,
        vendor=vendor,
        url=override_url or None,
    )
except pf.FetchError as exc:
    print(f"[roll] fetch failed: {exc}", file=sys.stderr)
    print("[roll] keeping existing snapshot, no changes written  保留旧快照，未写入新文件",
          file=sys.stderr)
    sys.exit(2)
except pf.ParseError as exc:
    print(f"[roll] parse failed: {exc}", file=sys.stderr)
    print("[roll] keeping existing snapshot, no changes written  保留旧快照，未写入新文件",
          file=sys.stderr)
    sys.exit(3)

kind, _, _ = (action + "::").split(":", 2)
if action == "unchanged":
    print("[roll] up to date  价格快照已是最新")
    sys.exit(0)
if kind == "first":
    print(f"[roll] baseline snapshot written  写入首份基线快照")
elif kind == "written":
    print(f"[roll] new snapshot written  写入新版价格快照")
print(pf.format_diff(changes, colored=sys.stdout.isatty()))
PY
}

cmd_prices() {
  local subcmd="${1:-}"
  shift || true
  case "$subcmd" in
    show)              cmd_prices_show "$@" ;;
    refresh)           cmd_prices_refresh "$@" ;;
    --help|-h|help|"") _prices_help ;;
    *)
      err "$(msg prices.unknown_subcommand ${subcmd})"
      _prices_help >&2
      return 1
      ;;
  esac
}

# FIX-178: AI-style a deterministic changelog draft into the project voice via
# the configured agent (roll agent use). Content-preserving polish only — same
# items + (ID)s, bold-lead style anchored on the last 3 released versions. Echoes
# the styled draft on success; empty output on any failure so the caller falls
# back to the deterministic raw draft. Bounded by a 150s watchdog (macOS lacks
# timeout(1)). Uses the same agent path as peer review (_agent_argv).
_changelog_ai_style() {
  local raw="$1"
  [ -n "$raw" ] || return 1
  local agent; agent=$(_project_agent)
  local anchors; anchors=$(_changelog_style_anchors CHANGELOG.md 2>/dev/null || true)
  local prompt
  prompt="你是 roll 的 changelog 编辑。把【原始草稿】改写成发布说明,严格遵守:
1) 不增删条目、不改事实;每条保留它的(ID)。只润色措辞。
2) 每条格式:\`- **简短标题(ID)** — 补充说明\`;若原条目结尾有 \`[loop]\` 则保留。
3) 保留每个 ### 分类标题与分组。只输出 markdown,从 '## Unreleased' 开始,前后不要任何解释。

【风格锚点·最近版本】
${anchors}

【原始草稿】
${raw}"
  _agent_argv "$agent" text "$prompt" 2>/dev/null || return 1
  _agent_bypass_claude_perms
  local out; out=$(mktemp)
  "${_AGENT_ARGV[@]}" >"$out" 2>/dev/null &
  local pid=$!
  local waited=0
  while kill -0 "$pid" 2>/dev/null; do
    sleep 1; waited=$((waited + 1))
    if [ "$waited" -ge 150 ]; then
      kill "$pid" 2>/dev/null; sleep 1; kill -9 "$pid" 2>/dev/null; break
    fi
  done
  wait "$pid" 2>/dev/null
  # Normalize agent chrome before extracting: some agents (e.g. kimi) prefix the
  # rendered answer with a "• " marker and indent the body. Changelog markdown is
  # flat (##, ###, -, blank), so stripping a leading "• " and any leading
  # whitespace is safe and is a no-op for plain agents (e.g. claude -p). Then
  # keep from the first '## Unreleased' line onward (drops any preamble).
  sed -E 's/^[[:space:]]*•[[:space:]]?//; s/^[[:space:]]+//' "$out" \
    | awk '/^## Unreleased/{f=1} f{print}'
  rm -f "$out"
}

# FIX-178: replace (or insert) the ## Unreleased section of CHANGELOG.md with
# the given draft (which itself begins with '## Unreleased'). getline-from-file
# keeps the multi-line draft intact.
_changelog_write_unreleased() {
  local draft="$1" cl="${2:-CHANGELOG.md}"
  local dfile; dfile=$(mktemp); printf '%s\n' "$draft" > "$dfile"
  local tmp; tmp=$(mktemp)
  if [ -f "$cl" ] && grep -q '^## Unreleased' "$cl"; then
    awk -v df="$dfile" '
      /^## Unreleased/ && !done { while ((getline line < df) > 0) print line; print ""; skip=1; done=1; next }
      skip && /^## / { skip=0 }
      skip { next }
      { print }
    ' "$cl" > "$tmp"
  else
    { [ -f "$cl" ] && head -1 "$cl" || echo "# Changelog"; echo; cat "$dfile"; echo;
      [ -f "$cl" ] && tail -n +2 "$cl"; } > "$tmp"
  fi
  mv "$tmp" "$cl"; rm -f "$dfile"
}

# FIX-113: changelog audit — list PRs merged to main since latest release
# tag that don't appear in CHANGELOG.md's ## Unreleased section.
# US-CL-006: changelog generate — deterministic draft from backlog Done stories.
# FIX-178: generate now AI-styles the deterministic draft via the configured
# agent by default (content-preserving); --no-ai / --json stay deterministic.
cmd_changelog() {
  local subcmd="${1:-generate}"
  shift || true
  case "$subcmd" in
    generate)
      local want_ai=1 to_write=0 is_json=0 pyargs=()
      local a
      for a in "$@"; do
        case "$a" in
          --no-ai) want_ai=0 ;;
          --write) to_write=1 ;;
          --json)  is_json=1; want_ai=0; pyargs+=("$a") ;;
          *)       pyargs+=("$a") ;;
        esac
      done
      local raw
      raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" ${pyargs[@]+"${pyargs[@]}"}) || return 1
      if [ "$is_json" = 1 ]; then printf '%s\n' "$raw"; return 0; fi
      local final="$raw"
      if [ "$want_ai" = 1 ]; then
        local styled; styled=$(_changelog_ai_style "$raw" 2>/dev/null || true)
        if [ -n "$styled" ] && printf '%s' "$styled" | grep -q '^- '; then
          final="$styled"
        else
          warn "changelog: AI 润色不可用/失败,输出确定性草稿(可加 --no-ai 跳过)"
        fi
      fi
      if [ "$to_write" = 1 ]; then
        _changelog_write_unreleased "$final"
        info "Updated CHANGELOG.md"
      else
        printf '%s\n' "$final"
      fi
      ;;
    --help|-h|help)
      cat <<EOF
Usage: roll changelog generate [options]

  从 backlog ✅ Done 故事 + 上次发布以来的提交,生成 ## Unreleased 发布说明。
  默认用配置的 agent(roll agent use)按项目风格润色;失败自动回退确定性草稿。

  roll changelog generate               # 预览(AI 润色)
  roll changelog generate --write       # 写入 CHANGELOG.md(AI 润色)
  roll changelog generate --no-ai       # 仅确定性草稿,不调 AI
  roll changelog generate --json        # 机器可读(确定性)
EOF
      ;;
    *)
      err "$(msg changelog.unknown_subcommand ${subcmd})"
      err "Try: roll changelog generate"
      return 1
      ;;
  esac
}

# ─── roll consistency check — unified consistency orchestrator (US-CONSIST-001) ──
cmd_consistency() {
  local subcmd="${1:-check}"
  shift || true
  case "$subcmd" in
    check)
      python3 "${ROLL_PKG_DIR}/lib/consistency_check.py" "$@"
      ;;
    --help|-h|help)
      cat <<EOF
Usage: roll consistency <subcommand>

  check [--json] [--project-dir DIR]    逐维度跑一致性检查
    Run checks across five dimensions (code, docs, i18n, tests, site)
    and produce a structured pass/gap report.

  roll consistency check                # human-readable report
  roll consistency check --json         # machine-readable JSON
EOF
      ;;
    *)
      err "$(msg consistency.unknown_sub "$subcmd")"
      err "Try: roll consistency check"
      return 1
      ;;
  esac
}

# ─── roll config — unified read/list/set for loop schedule keys (US-LOOP-033) ──
#
# One interactive entry point so users don't have to remember whether a key
# lives in ~/.roll/config.yaml (global) or .roll/local.yaml (project), nor hand-
# edit yaml and remember to reload.
#
# Key registry — one record per supported key, pipe-delimited:
#   <key>|<scope>|<store>|<min>|<max>|<default>
#     scope   default write target: global → ~/.roll/config.yaml
#                                    project → .roll/local.yaml
#     store   flat            → top-level `key: value`
#             nested:<parent> → indented under a `<parent>:` block
#     min/max integer range (inclusive); empty min disables the lower bound
#     default value reported as "(default)" when unset; "-" means auto-derived
_config_keys() {
  cat <<'EOF'
loop_active_start|project|nested:loop_schedule|0|23|0
loop_active_end|project|nested:loop_schedule|1|24|24
loop_schedule.period_minutes|project|nested:loop_schedule|1|1440|60
loop_schedule.offset_minute|project|nested:loop_schedule|0|59|0
loop_dream_hour|global|flat|0|23|3
loop_dream_minute|global|flat|0|59|-
EOF
}

# Echo the registry record for a key, or return 1 if unknown.
_config_key_record() {
  local key="$1" line
  while IFS= read -r line; do
    [[ "${line%%|*}" == "$key" ]] && { printf '%s\n' "$line"; return 0; }
  done < <(_config_keys)
  return 1
}

# List the bare key names (for help / list iteration).
_config_list_keys() {
  local line
  while IFS= read -r line; do
    printf '%s\n' "${line%%|*}"
  done < <(_config_keys)
}

# The yaml file backing a key, given its scope. project → CWD .roll/local.yaml.
_config_key_file() {
  local scope="$1"
  if [[ "$scope" == "global" ]]; then
    printf '%s\n' "$ROLL_CONFIG"
  else
    printf '%s\n' ".roll/local.yaml"
  fi
}

# Resolve a key to "<value>\t<source>". source is a file path or "default".
# Reads the project file first, then global, then the registry default.
_config_resolve() {
  local key="$1" record scope store def file parent val
  record=$(_config_key_record "$key") || return 1
  IFS='|' read -r _ scope store _ _ def <<<"$record"
  file=$(_config_key_file "$scope")

  if [[ "$store" == nested:* ]]; then
    parent="${store#nested:}"
    local child="${key#*.}"
    [[ "$key" != *.* ]] && child="$key"
    val=$(_yaml_read_nested "$file" "$parent" "$child")
  else
    if [[ -f "$file" ]]; then
      val=$(grep -E "^${key}:" "$file" 2>/dev/null | head -1 \
        | sed 's/^[^:]*:[[:space:]]*//' | sed 's/[[:space:]]*#.*$//' | sed 's/[[:space:]]*$//')
    fi
  fi

  if [[ -n "$val" ]]; then
    printf '%s\t%s\n' "$val" "$file"
    return 0
  fi
  printf '%s\t%s\n' "$def" "default"
  return 0
}

# Validate that $value is an integer within [min,max] for the registry key.
# Prints a bilingual error and returns 2 on failure (caller exits 2).
_config_validate() {
  local key="$1" value="$2" record min max
  record=$(_config_key_record "$key") || return 1
  IFS='|' read -r _ _ _ min max _ <<<"$record"
  if ! [[ "$value" =~ ^-?[0-9]+$ ]]; then
    err "config: '$key' expects an integer, got '$value'"
    err "config：'$key' 需要整数，收到 '$value'"
    return 2
  fi
  if [[ -n "$min" ]] && (( value < min )); then
    err "config: '$key' must be >= ${min} (got ${value})"
    err "config：'$key' 必须 >= ${min}（收到 ${value}）"
    return 2
  fi
  if [[ -n "$max" ]] && (( value > max )); then
    err "config: '$key' must be <= ${max} (got ${value})"
    err "config：'$key' 必须 <= ${max}（收到 ${value}）"
    return 2
  fi
  return 0
}

# Idempotent yaml writer. Replaces an existing line in place (preserving every
# other line / comment / order) or appends under the right block. Handles both
# flat top-level keys and nested `parent.child` keys. US-LOOP-033.
#   _config_set <key> <value> <file>
_config_set() {
  local key="$1" value="$2" file="$3"
  local record store parent child
  record=$(_config_key_record "$key") || return 1
  IFS='|' read -r _ _ store _ _ _ <<<"$record"

  mkdir -p "$(dirname "$file")" 2>/dev/null || true
  [[ -f "$file" ]] || : > "$file"

  local tmp; tmp="$(mktemp)"
  if [[ "$store" == nested:* ]]; then
    parent="${store#nested:}"
    child="${key#*.}"
    [[ "$key" != *.* ]] && child="$key"
    ROLL_CFG_PARENT="$parent" ROLL_CFG_CHILD="$child" ROLL_CFG_VAL="$value" \
      awk '
      BEGIN { parent=ENVIRON["ROLL_CFG_PARENT"]; child=ENVIRON["ROLL_CFG_CHILD"]; val=ENVIRON["ROLL_CFG_VAL"]; done=0 }
      # Entering target parent block
      $0 ~ ("^" parent ":") { in_block=1; print; next }
      # Inside the block: replace the child line if present
      in_block && $0 ~ ("^[[:space:]]+" child ":") {
        print "  " child ": " val; done=1; in_block=0; next
      }
      # Leaving the block at the next top-level key — append child if not done
      in_block && /^[^[:space:]]/ {
        if (!done) { print "  " child ": " val; done=1 }
        in_block=0; print; next
      }
      { print }
      END {
        if (in_block && !done) { print "  " child ": " val; done=1 }
        if (!done) { print parent ":"; print "  " child ": " val }
      }
    ' "$file" > "$tmp"
  else
    ROLL_CFG_KEY="$key" ROLL_CFG_VAL="$value" \
      awk '
      BEGIN { key=ENVIRON["ROLL_CFG_KEY"]; val=ENVIRON["ROLL_CFG_VAL"]; done=0 }
      $0 ~ ("^" key ":") && !done { print key ": " val; done=1; next }
      { print }
      END { if (!done) print key ": " val }
    ' "$file" > "$tmp"
  fi
  mv "$tmp" "$file"
}

_config_help() {
  cat <<'EOF'
Usage: roll config <key>                 print current value + source
       roll config --list                list all loop schedule keys
       roll config <key> <value> [--global|--project]   set a value
                                                                  统一调度配置
Read / list / set the loop and dream schedule keys without hand-editing
yaml. Default write scope is --project (.roll/local.yaml); --global writes
~/.roll/config.yaml.
读 / 列 / 写 loop、dream 调度 key，免去手工编辑 yaml。默认写 --project
（.roll/local.yaml）；--global 写 ~/.roll/config.yaml。

Supported keys (range):
  loop_active_start              0-23    loop active window start hour
  loop_active_end                1-24    loop active window end hour
  loop_schedule.period_minutes   1-1440  fire interval in minutes
  loop_schedule.offset_minute    0-59    minute offset within the period
  loop_dream_hour                0-23    dream daily fire hour
  loop_dream_minute              0-59    dream daily fire minute

Compact facades (write multiple keys at once):
  roll config loop-window 9-18              loop_active_start + loop_active_end
  roll config loop-schedule 30/7            period_minutes + offset_minute
  roll config dream-time 03:20              loop_dream_hour + loop_dream_minute

Examples:
  roll config loop_dream_hour
  roll config --list
  roll config loop_schedule.period_minutes 30
  roll config loop_dream_hour 3 --global
  roll config dream-time 03:20
EOF
}

# US-LOOP-034: `roll config loop-window <start>-<end>` — compact facade that
# writes loop_active_start + loop_active_end in one shot. With no value, prints
# the current effective window + source. start must be < end and both ∈ [0,24].
_config_loop_window() {
  local value="$1"
  if [[ -z "$value" ]]; then
    local rs re vs ss
    rs=$(_config_resolve "loop_active_start"); re=$(_config_resolve "loop_active_end")
    vs="${rs%%$'\t'*}"; ss="${rs#*$'\t'}"
    local ve; ve="${re%%$'\t'*}"
    [[ "$ss" != "default" ]] && ss="from $ss" || ss="from default"
    printf 'loop-window: %s-%s (%s)\n' "$vs" "$ve" "$ss"
    return 0
  fi
  if ! [[ "$value" =~ ^[0-9]+-[0-9]+$ ]]; then
    err "config: loop-window expects <start>-<end>, got '$value'"
    err "config：loop-window 需要 <start>-<end> 格式，收到 '$value'"
    return 2
  fi
  local start="${value%-*}" end="${value#*-}"
  if (( start < 0 )) || (( start > 24 )); then
    err "config: loop-window start must be in [0,24]"
    err "config：loop-window 开始时间必须在 [0,24]"
    return 2
  fi
  if (( end > 24 )); then
    err "config: loop-window end must be <= 24"
    err "config：loop-window 结束时间必须 ≤ 24"
    return 2
  fi
  if (( start >= end )); then
    err "config: loop-window start must be < end (got ${start}-${end})"
    err "config：loop-window 开始时间必须 < 结束时间（收到 ${start}-${end}）"
    return 2
  fi
  local file
  file=$(_config_key_file "$ROLL_CFG_SCOPE")
  _config_set "loop_active_start" "$start" "$file"
  _config_set "loop_active_end" "$end" "$file"
  ok "✓ set loop-window = ${start}-${end} in $file"
  printf 'run `roll loop on` to apply\n'
  return 0
}

# US-LOOP-034: `roll config loop-schedule <period>[/<offset>]` — writes
# loop_schedule.period_minutes (+ offset_minute when given). With no value,
# prints the current effective schedule + source. period ∈ [1,1440],
# offset ∈ [0, period-1]; offset omitted leaves the offset line untouched.
_config_loop_schedule() {
  local value="$1"
  if [[ -z "$value" ]]; then
    local rp ro vp vo sp
    rp=$(_config_resolve "loop_schedule.period_minutes")
    ro=$(_config_resolve "loop_schedule.offset_minute")
    vp="${rp%%$'\t'*}"; sp="${rp#*$'\t'}"; vo="${ro%%$'\t'*}"
    [[ "$sp" != "default" ]] && sp="from $sp" || sp="from default"
    printf 'loop-schedule: every %smin (offset :%s) (%s)\n' "$vp" "$vo" "$sp"
    return 0
  fi
  if ! [[ "$value" =~ ^[0-9]+(/[0-9]+)?$ ]]; then
    err "config: loop-schedule expects <period>[/<offset>], got '$value'"
    err "config：loop-schedule 需要 <period>[/<offset>] 格式，收到 '$value'"
    return 2
  fi
  local period="${value%%/*}" offset=""
  [[ "$value" == */* ]] && offset="${value#*/}"
  if (( period < 1 )) || (( period > 1440 )); then
    err "config: loop-schedule period must be in [1,1440]"
    err "config：loop-schedule 周期必须在 [1,1440]"
    return 2
  fi
  if [[ -n "$offset" ]] && (( offset > period - 1 )); then
    err "config: loop-schedule offset must be in [0, period-1] (period ${period})"
    err "config：loop-schedule 偏移必须在 [0, period-1]（周期 ${period}）"
    return 2
  fi
  local file
  file=$(_config_key_file "$ROLL_CFG_SCOPE")
  _config_set "loop_schedule.period_minutes" "$period" "$file"
  if [[ -n "$offset" ]]; then
    _config_set "loop_schedule.offset_minute" "$offset" "$file"
    ok "✓ set loop-schedule = ${period}/${offset} in $file"
  else
    ok "✓ set loop-schedule = ${period} in $file"
  fi
  printf 'run `roll loop on` to apply\n'
  return 0
}

# US-LOOP-035: `roll config dream-time <HH:MM>` — compact facade writing
# loop_<svc>_hour + loop_<svc>_minute in one shot. With no value, prints the
# current effective time + source. HH ∈ [0,23], MM ∈ [0,59].
# These keys are global-scoped, so writes land in ~/.roll/config.yaml.
# FIX-195: brief retired — svc is {dream} (the helper stays generic).
#   _config_daily_time <svc> <value>
_config_daily_time() {
  local svc="$1" value="$2"
  local hour_key="loop_${svc}_hour" min_key="loop_${svc}_minute"
  if [[ -z "$value" ]]; then
    local rh rm vh vm sh
    rh=$(_config_resolve "$hour_key"); rm=$(_config_resolve "$min_key")
    vh="${rh%%$'\t'*}"; sh="${rh#*$'\t'}"; vm="${rm%%$'\t'*}"
    # minute auto-derives to "-" when unset; render as 00 for display
    [[ "$vm" == "-" || -z "$vm" ]] && vm="0"
    [[ "$sh" != "default" ]] && sh="from $sh" || sh="from default"
    printf '%s-time: %02d:%02d (%s)\n' "$svc" "$vh" "$vm" "$sh"
    return 0
  fi
  if ! [[ "$value" =~ ^[0-9]{1,2}:[0-9]{1,2}$ ]]; then
    err "config: ${svc}-time expects <HH:MM>, got '$value'"
    err "config：${svc}-time 需要 <HH:MM> 格式，收到 '$value'"
    return 2
  fi
  local hh="${value%%:*}" mm="${value#*:}"
  # strip leading zeros to avoid octal interpretation in arithmetic
  hh=$((10#$hh)); mm=$((10#$mm))
  if (( hh < 0 )) || (( hh > 23 )); then
    err "config: ${svc}-time hour must be in [0,23]"
    err "config：${svc}-time 小时必须在 [0,23]"
    return 2
  fi
  if (( mm < 0 )) || (( mm > 59 )); then
    err "config: ${svc}-time minute must be in [0,59]"
    err "config：${svc}-time 分钟必须在 [0,59]"
    return 2
  fi
  local file
  file=$(_config_key_file "$ROLL_CFG_SCOPE")
  _config_set "$hour_key" "$hh" "$file"
  _config_set "$min_key" "$mm" "$file"
  ok "$(printf '✓ set %s-time = %02d:%02d in %s' "$svc" "$hh" "$mm" "$file")"
  printf 'run `roll loop on` to apply\n'
  return 0
}

# US-LOOP-036: after a schedule key is written, regenerate the launchd plists so
# `roll loop status` and the actual fire schedule reflect the new yaml without a
# manual `roll loop on`. yaml is the source of truth — a reload failure must NOT
# fail the write, so we warn (not err) and point the user at the manual fallback.
# When the loop is paused/muted we still rewrite the plist (content vs running
# state are orthogonal) but add a hint that the schedule is updated yet not
# firing. Sandbox guards inside _install_launchd_plists keep this off the host's
# real launchd domain during tests.
_config_reload_schedule() {
  local project_path; project_path="$(pwd -P)"
  if _install_launchd_plists "$project_path" >/dev/null 2>&1; then
    if [[ -f "$_LOOP_STATE" ]] && grep -q "^status: paused" "$_LOOP_STATE" 2>/dev/null; then
      warn "loop is paused — schedule updated but not firing"
      warn "loop 已暂停 —— 调度已更新但不会触发"
    elif [[ -f "$_LOOP_MUTE_FILE" ]]; then
      warn "loop is muted — schedule updated but not firing"
      warn "loop 已静音 —— 调度已更新但不会触发"
    fi
  else
    warn "config: schedule reload failed — yaml is saved; run \`roll loop on\` to apply"
    warn "config：调度重载失败 —— yaml 已保存；运行 \`roll loop on\` 手工生效"
  fi
}

cmd_config() {
  local key="" value="" scope="" want_list=""
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --help|-h|help) _config_help; return 0 ;;
      --list)         want_list=1 ;;
      --global)       scope="global" ;;
      --project)      scope="project" ;;
      *)
        if [[ -z "$key" ]]; then key="$1"
        elif [[ -z "$value" ]]; then value="$1"
        else err "config: unexpected argument '$1'"; err "config：多余参数 '$1'"; return 2
        fi
        ;;
    esac
    shift
  done

  # US-LOOP-034 facade subcommands: translate a compact string into low-level
  # key/value writes. Scope defaults to project (loop schedule lives there).
  case "$key" in
    loop-window|loop-schedule)
      local fscope="$scope"; [[ -z "$fscope" ]] && fscope="project"
      ROLL_CFG_SCOPE="$fscope"
      local _rc
      if [[ "$key" == "loop-window" ]]; then
        _config_loop_window "$value"; _rc=$?
      else
        _config_loop_schedule "$value"; _rc=$?
      fi
      # US-LOOP-036: auto-reload only on a successful write (value supplied).
      [[ $_rc -eq 0 && -n "$value" ]] && _config_reload_schedule
      return $_rc
      ;;
    dream-time)
      # FIX-195: brief-time retired with the brief loop; dream-time is the only
      # daily schedule facade. The key is global-scoped (~/.roll/config.yaml).
      local fscope="$scope"; [[ -z "$fscope" ]] && fscope="global"
      ROLL_CFG_SCOPE="$fscope"
      local _rc
      _config_daily_time "${key%-time}" "$value"; _rc=$?
      [[ $_rc -eq 0 && -n "$value" ]] && _config_reload_schedule
      return $_rc
      ;;
  esac

  # --list: tabulate every key with value + source
  if [[ -n "$want_list" ]]; then
    local k v src resolved
    while IFS= read -r k; do
      resolved=$(_config_resolve "$k")
      v="${resolved%%$'\t'*}"; src="${resolved#*$'\t'}"
      printf '  %-30s = %-8s (%s)\n' "$k" "$v" "$src"
    done < <(_config_list_keys)
    return 0
  fi

  if [[ -z "$key" ]]; then
    _config_help
    return 0
  fi

  if ! _config_key_record "$key" >/dev/null; then
    err "config: unknown key '$key'"
    err "config：未知 key '$key'"
    err "Try: roll config --list"
    return 2
  fi

  # Read mode: no value supplied
  if [[ -z "$value" ]]; then
    local resolved v src
    resolved=$(_config_resolve "$key")
    v="${resolved%%$'\t'*}"; src="${resolved#*$'\t'}"
    printf '%s = %s  (from %s)\n' "$key" "$v" "$src"
    return 0
  fi

  # Write mode
  _config_validate "$key" "$value" || return $?
  [[ -z "$scope" ]] && scope="project"
  local record key_scope file
  record=$(_config_key_record "$key")
  IFS='|' read -r _ key_scope _ _ _ _ <<<"$record"
  if [[ "$scope" == "global" ]]; then
    file="$ROLL_CONFIG"
  else
    file=".roll/local.yaml"
  fi
  _config_set "$key" "$value" "$file"
  ok "✓ set $key = $value in $file"
  # US-LOOP-036: every recognized config key is a loop/dream schedule key
  # (display-only keys are out of scope for this command), so a successful write
  # always reloads the launchd plists.
  _config_reload_schedule
  return 0
}

cmd_slides() {
  local subcmd="${1:-}"
  shift || true
  case "$subcmd" in
    build)
      cmd_slides_build "$@"
      ;;
    new)
      cmd_slides_new "$@"
      ;;
    list)
      cmd_slides_list "$@"
      ;;
    preview)
      cmd_slides_preview "$@"
      ;;
    logs)
      cmd_slides_logs "$@"
      ;;
    templates)
      cmd_slides_templates "$@"
      ;;
    delete)
      cmd_slides_delete "$@"
      ;;
    --help|-h|help)
      _slides_help
      return 0
      ;;
    "")
      _slides_help
      return 1
      ;;
    *)
      err "$(msg slides.unknown_subcommand ${subcmd})"
      _slides_help >&2
      return 1
      ;;
  esac
}

cmd_review_pr() {
  local pr_number="${1:-}"
  [ -n "$pr_number" ] || { err "Usage: roll review-pr <number>"; return 1; }

  local slug; slug=$(_gh_repo_slug) || { err "Not a GitHub repo — review-pr requires GitHub remote"; return 1; }

  local pr_json diff
  pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body 2>&1) \
    || { err "gh pr view failed: ${pr_json}"; return 1; }
  diff=$(gh -R "$slug" pr diff "$pr_number" 2>/dev/null) || true

  local title body diff
  title=$(echo "$pr_json" | jq -r '.title // ""')
  body=$(echo "$pr_json" | jq -r '.body // ""')

  if echo "$body" | grep -qF '[skip-ai-review]'; then
    gh -R "$slug" pr review "$pr_number" --approve -b "Auto-approved: [skip-ai-review] detected" 2>/dev/null || true
    info "PR #${pr_number}: [skip-ai-review] — auto-approved"
    return 0
  fi

  local template="${ROLL_PKG_DIR}/skills/roll-review-pr/SKILL.md"
  [ -f "$template" ] || { err "Skill template not found: ${template}"; return 1; }

  local content; content=$(_skill_content "$template")

  local prompt
  prompt=$(
    local tmp; tmp=$(mktemp)
    # shellcheck disable=SC2064
    trap "rm -f '$tmp'" EXIT

    echo "$content" > "$tmp"
    sed -i '' "s|{{PR_TITLE}}|${title}|g" "$tmp" 2>/dev/null \
      || sed -i "s|{{PR_TITLE}}|${title}|g" "$tmp"

    local body_escaped; body_escaped=$(printf '%s' "$body" | sed 's/[&/\]/\\&/g; s/$/\\/' | sed '$ s/\\$//')
    sed -i '' "s|{{PR_BODY}}|${body_escaped}|g" "$tmp" 2>/dev/null \
      || sed -i "s|{{PR_BODY}}|${body_escaped}|g" "$tmp"

    local diff_truncated
    diff_truncated=$(echo "$diff" | head -500)
    local diff_file; diff_file=$(mktemp)
    echo "$diff_truncated" > "$diff_file"
    awk -v f="$diff_file" '{
      if (index($0, "{{PR_DIFF}}")) {
        while ((getline line < f) > 0) print line
        close(f)
      } else print
    }' "$tmp" > "${tmp}.out" && mv "${tmp}.out" "$tmp"
    rm -f "$diff_file"

    cat "$tmp"
    rm -f "$tmp"
    trap - EXIT
  )

  local agent; agent=$(_project_agent)
  local output
  info "Reviewing PR #${pr_number} with ${agent}..."
  _agent_argv "$agent" text "$prompt" || { err "Unknown agent '${agent}'"; return 1; }
  local _stderr_log; _stderr_log=$(mktemp)
  output=$("${_AGENT_ARGV[@]}" 2>"$_stderr_log")
  if [[ -z "$output" && -s "$_stderr_log" ]]; then
    err "agent ${agent} produced no output. stderr (first 5 lines):"
    head -5 "$_stderr_log" | sed 's/^/    /' >&2
  fi
  rm -f "$_stderr_log"

  echo "$output"

  local verdict; verdict=$(_parse_review_verdict "$output")
  local vtype; vtype="${verdict%%:*}"
  local vreason; vreason="${verdict#*:}"
  [ "$vreason" = "$vtype" ] && vreason=""

  case "$vtype" in
    APPROVE)
      gh -R "$slug" pr review "$pr_number" --approve -b "AI review: approved" 2>/dev/null || true
      info "PR #${pr_number}: APPROVED"
      ;;
    REQUEST_CHANGES)
      gh -R "$slug" pr review "$pr_number" --request-changes -b "${vreason:-AI review requested changes}" 2>/dev/null || true
      info "PR #${pr_number}: REQUEST_CHANGES — ${vreason}"
      ;;
    UNCERTAIN)
      warn "PR #${pr_number}: UNCERTAIN — ${vreason}"
      # FIX-052: write to per-project ALERT (was global ALERT.md).
      local alert_file="$_LOOP_ALERT"
      mkdir -p "$(dirname "$alert_file")"
      printf '[%s] PR #%s: AI review UNCERTAIN — %s\n' \
        "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$pr_number" "$vreason" >> "$alert_file"
      ;;
    *)
      warn "PR #${pr_number}: no verdict parsed from agent output"
      ;;
  esac
}

# US-AGENT-025: recent degradation traces for the `roll agent` view.
#
# Scans the tail of runs.jsonl for rows that carry a non-empty `fallback_from`
# and prints one "<tier> <intended-agent> <ran-agent>" line per degradation,
# newest first, capped at LIMIT (default 5). The intended agent is the
# `fallback_from` field (the agent the tier router originally picked); the ran
# agent is the row's `agent` field (the fallback that actually executed).
#
# Honors ROLL_AGENT_RUNS_FILE (tests) over the live _LOOP_RUNS path. Prints
# nothing (exit 0) when there is no runs.jsonl, no jq, or no fallback rows.
_agent_view_recent_fallbacks() {
  local limit="${1:-5}"
  local runs_file="${ROLL_AGENT_RUNS_FILE:-$_LOOP_RUNS}"
  [ -n "$runs_file" ] && [ -f "$runs_file" ] || return 0
  command -v jq >/dev/null 2>&1 || return 0
  # tail then reverse so newest degradations come first; jq filters rows that
  # actually fell back. tier may be absent on older rows → default to "?".
  tail -n 200 "$runs_file" 2>/dev/null \
    | jq -r 'select((.fallback_from // "") != "")
             | "\(.tier // "?") \(.fallback_from) \(.agent // "?")"' 2>/dev/null \
    | tail -n "$limit" \
    | sed '1!G;h;$!d'
}

cmd_agent() {
  local subcmd="${1:-}"; shift || true
  case "$subcmd" in
    use)
      # US-AGENT-027: one-shot lock of the three complexity tiers to a single
      # agent. Backward-compatible semantics: `roll agent use claude` still
      # "works", but its meaning is upgraded from a single per-project agent
      # pref to "lock easy/default/hard". The `fallback` slot is intentionally
      # left untouched so a degraded run still has somewhere to go.
      local name="${1:-}"
      [[ -z "$name" ]] && { err "$(msg agent.use_usage)"; exit 1; }
      # Collapse Antigravity aliases (antigravity / legacy gemini) to canonical agy.
      name="$(_canonical_agent_name "$name")"
      # Reject unknown OR uninstalled agents up front — `use` locks the routing
      # config, so naming an agent this machine cannot run is a hard error
      # (unlike `set`, `use` is the "I only have one agent" shortcut).
      if ! _agent_is_known "$name" || ! _agent_installed_by_name "$name"; then
        err "$(msg agent.use_unknown_agent "$name")"; exit 1
      fi
      mkdir -p .roll
      # Lock the three complexity tiers; leave fallback alone. Each write is
      # atomic (temp file → mv) inside _agents_config_set_slot.
      local slot
      for slot in easy default hard; do
        if ! _agents_config_set_slot "$slot" "$name"; then
          err "$(msg agent.set_write_failed "$slot")"; exit 1
        fi
      done
      # Back-compat: keep the legacy single-agent pref (.roll/local.yaml) in
      # sync so code paths still reading _project_agent see the same agent.
      # Migrate / clean the old project-root .roll.yaml on the spot.
      local pref; pref=$(_project_agent_pref_file)
      if [[ -f "$pref" ]] && grep -q "^agent:" "$pref"; then
        local tmp; tmp=$(mktemp) && sed "s/^agent:.*/agent: ${name}/" "$pref" > "$tmp" && mv "$tmp" "$pref"
      else
        echo "agent: ${name}" >> "$pref"
      fi
      if [[ -f ".roll.yaml" ]]; then
        local tmp; tmp=$(mktemp)
        grep -v "^agent:" .roll.yaml > "$tmp" 2>/dev/null || true
        mv "$tmp" .roll.yaml
        [[ -s ".roll.yaml" ]] || rm -f .roll.yaml
      fi
      ok "$(msg agent.use_locked "$(_agent_display_name "$name")")"
      local project_path; project_path=$(pwd -P)
      local slug; slug=$(_project_slug "$project_path")
      local runner="${_SHARED_ROOT}/loop/run-${slug}.sh"
      if [[ -f "$runner" ]]; then
        _install_launchd_plists "$project_path" >/dev/null
        ok "$(msg agent.loop_runner_scripts_regenerated_for_new)"
      fi
      ;;
    set)
      # US-AGENT-026: cascade interactive picker (slot → agent) with a
      # non-interactive `set <slot> <agent>` shortcut for pipes/CI. No model
      # layer — picking the agent settles the slot (the agent's own default
      # model is used). Writes back to .roll/agents.yaml (atomic, comments
      # preserved).
      local slot="${1:-}" agent="${2:-}"
      local valid_slots="easy default hard fallback"

      # ── Slot selection ────────────────────────────────────────────────
      if [[ -z "$slot" ]]; then
        {
          echo "$(msg agent.set_pick_slot)"
          local i=1 s
          for s in $valid_slots; do
            printf "    %d) %s\n" "$i" "$s"; i=$((i + 1))
          done
          printf "  $(msg agent.set_enter_number) [1-4]: "
        } >&2
        local choice
        if ! IFS= read -r choice; then
          err "$(msg agent.set_no_input)"; exit 1
        fi
        if ! [[ "$choice" =~ ^[0-9]+$ ]] || (( choice < 1 || choice > 4 )); then
          err "$(msg agent.set_invalid_choice "$choice")"; exit 1
        fi
        slot=$(echo "$valid_slots" | cut -d' ' -f"$choice")
      else
        # Validate a slot passed on the command line.
        local ok_slot=""
        local s
        for s in $valid_slots; do [[ "$s" == "$slot" ]] && ok_slot=1; done
        if [[ -z "$ok_slot" ]]; then
          err "$(msg agent.set_unknown_slot "$slot")"; exit 1
        fi
      fi

      # ── Agent selection ───────────────────────────────────────────────
      if [[ -z "$agent" ]]; then
        # Build the candidate list: installed AND online agents only.
        local -a online=()
        local cand
        while IFS= read -r cand; do
          [[ -z "$cand" ]] && continue
          if _agent_available "$cand" >/dev/null 2>&1; then
            online+=("$cand")
          fi
        done < <(_agents_installed)
        if [[ ${#online[@]} -eq 0 ]]; then
          err "$(msg agent.set_no_online_agents)"; exit 1
        fi
        agent=$(_onboard_select_agent "${online[@]}") || { err "$(msg agent.set_no_input)"; exit 1; }
      else
        # Non-interactive value: validate it is a known agent name.
        agent="$(_canonical_agent_name "$agent")"
        if ! _agent_is_known "$agent"; then
          err "$(msg agent.set_unknown_agent "$agent")"; exit 1
        fi
      fi

      if ! _agents_config_set_slot "$slot" "$agent"; then
        err "$(msg agent.set_write_failed "$slot")"; exit 1
      fi
      ok "$(msg agent.set_saved "$slot" "$(_agent_display_name "$agent")")"
      ;;
    list)
      echo ""; echo "  $(msg agent.available_agents)"; echo ""
      local current; current=$(_canonical_agent_name "$(_project_agent)")
      local a disp
      for a in claude kimi deepseek opencode codex openai pi qwen agy; do
        disp=$(_agent_display_name "$a")
        if _agent_installed_by_name "$a"; then
          [[ "$a" == "$current" ]] && echo -e "    ${GREEN}✓ ${disp}${NC}  (current)" \
                                   || echo -e "    ${GREEN}✓ ${disp}${NC}"
        else
          echo -e "    ${YELLOW}✗ ${disp}${NC}  (not installed)"
        fi
      done; echo ""
      ;;
    "")
      # US-AGENT-025: four-slot complexity-routing view — slot / agent /
      # online status (✓/✗) / note, plus the fallback slot (idle vs active)
      # and any recent runs.jsonl degradation traces.
      echo ""; echo "  $(msg agent.view_header)"; echo ""
      if ! _agents_config_path >/dev/null 2>&1; then
        echo -e "    ${YELLOW}$(msg agent.view_no_config)${NC}"
        echo "    $(msg agent.view_no_config_hint)"; echo ""
        return 0
      fi
      printf '    %-9s %-22s %-8s %s\n' \
        "$(msg agent.view_col_slot)" "$(msg agent.view_col_agent)" \
        "$(msg agent.view_col_status)" "$(msg agent.view_col_note)"
      # Is any slot's agent currently serving as the active fallback?
      local fb_active=""
      if [[ -n "$(_agent_view_recent_fallbacks 1)" ]]; then fb_active=1; fi
      local slot agent disp status_cell note
      for slot in easy default hard fallback; do
        agent=$(_agents_config_slot "$slot" 2>/dev/null || true)
        note=""
        if [[ -z "$agent" ]]; then
          disp="$(msg agent.view_slot_unset)"
          status_cell="-"
        else
          disp="$(_agent_display_name "$agent")"
          if _agent_available "$agent" >/dev/null 2>&1; then
            status_cell="$(echo -e "${GREEN}✓${NC}")"
          else
            status_cell="$(echo -e "${RED}✗${NC}")"
          fi
        fi
        if [[ "$slot" == "fallback" ]]; then
          if [[ -n "$fb_active" ]]; then
            note="$(msg agent.view_fallback_active)"
          else
            note="$(msg agent.view_fallback_idle)"
          fi
        fi
        printf '    %-9s %-22s %-8b %s\n' "$slot" "$disp" "$status_cell" "$note"
      done
      echo ""
      # Recent degradation traces (newest first).
      local degr; degr=$(_agent_view_recent_fallbacks 5)
      if [[ -n "$degr" ]]; then
        echo "  $(msg agent.view_recent_downgrades)"
        local tier from ran
        while read -r tier from ran; do
          [[ -z "$tier" ]] && continue
          echo "    $(msg agent.view_downgrade_line "$tier" "$from" "$ran")"
        done <<< "$degr"
        echo ""
      fi
      echo "  roll agent set <slot> <agent>   — set the agent for a slot"
      echo "  roll agent use <name>           — switch agent for this project"
      echo "  roll agent list                 — show installed agents"; echo ""
      ;;
    *) err "Unknown subcommand: $subcmd"; echo "Usage: roll agent [set <slot> <agent>|use <name>|list]"; exit 1 ;;
  esac
}

# ═══════════════════════════════════════════════════════════════════════════════
# ISOLATION — pluggable adapter for running tests in an isolated environment
# (US-ISO-001). Phase 1 supports two providers: `none` (default — direct host
# execution) and `tart` (US-ISO-002 — macOS VM). The dispatcher reads
# .roll/local.yaml's `test_isolation.type` and routes to
# `_isolation_<type>_<method>`. See .roll/features/engineering-infrastructure/
# dev-vm-isolation-plan.md for the full interface contract.
# ═══════════════════════════════════════════════════════════════════════════════

_ISOLATION_SUPPORTED_TYPES="none tart"

# Read test_isolation.type from .roll/local.yaml. Falls back to "none" when
# the file or key is missing. Uses python3+yaml for nested-key parsing,
# matching the parser used by cmd_offboard.
_isolation_get_type() {
  local val=""
  if [[ -f .roll/local.yaml ]] && command -v python3 >/dev/null 2>&1; then
    val=$(python3 - <<'PY' 2>/dev/null
import sys
try:
    import yaml
except ImportError:
    sys.exit(0)
try:
    data = yaml.safe_load(open(".roll/local.yaml")) or {}
except Exception:
    sys.exit(0)
section = data.get("test_isolation")
if isinstance(section, dict):
    t = section.get("type")
    if isinstance(t, str) and t:
        print(t)
PY
    )
  fi
  if [[ -z "$val" ]]; then
    val="none"
  fi
  printf '%s\n' "$val"
}

# Dispatch an isolation-adapter method to the configured provider.
# Usage: _isolation_dispatch <method> [args...]
# Methods: init / provision / exec / status / reset / destroy
_isolation_dispatch() {
  local method="$1"; shift || true
  if [[ -z "$method" ]]; then
    err "isolation: missing method"
    echo "  usage: _isolation_dispatch <init|provision|exec|status|reset|destroy> [args...]" >&2
    return 1
  fi

  # Resolve provider; emit a fallback-INFO line only when the config file is
  # missing (so an explicit `type: none` stays quiet). Goes to stderr so the
  # actual dispatch output (e.g. exec stdout) stays clean.
  local type; type=$(_isolation_get_type)
  if [[ "$type" = "none" ]] && [[ ! -f .roll/local.yaml ]]; then
    info "isolation: no test_isolation config, falling back to type=none (host)" >&2
  fi

  # Reject unknown types up front so the error names the provider, not the
  # missing function — this is the difference between "you typed it wrong"
  # and "the adapter is broken".
  local supported_ok=0 t
  for t in $_ISOLATION_SUPPORTED_TYPES; do
    [[ "$type" = "$t" ]] && supported_ok=1
  done
  if (( ! supported_ok )); then
    err "isolation: unknown type '$type' in .roll/local.yaml"
    echo "  supported types: ${_ISOLATION_SUPPORTED_TYPES// /, }" >&2
    return 1
  fi

  local fn="_isolation_${type}_${method}"
  if ! declare -F "$fn" >/dev/null 2>&1; then
    err "isolation: provider '$type' has no '${method}' implementation"
    return 1
  fi
  "$fn" "$@"
}

# ── `none` adapter (default — direct host execution) ──────────────────────
# init / provision / destroy are no-ops; exec runs the command in the host
# shell unchanged; status is always 'ready'; reset is a benign no-op
# (US-ISO-004 will print an explanatory message when invoked via roll test).
_isolation_none_init()      { return 0; }
_isolation_none_provision() { return 0; }
_isolation_none_exec()      { "$@"; }
_isolation_none_status()    { echo "ready"; return 0; }
_isolation_none_reset() {
  # US-ISO-004 AC: type=none has nothing to reset; print explanation but
  # exit 0 (not a failure — host execution is already as clean as it gets).
  info "isolation type 'none' has nothing to reset (host execution is stateless)" >&2
  return 0
}
_isolation_none_destroy()   { return 0; }

# ─── reset lock (US-ISO-004) ──────────────────────────────────────────────
# A single lockfile under .roll/ prevents two `roll test --reset` runs from
# racing, and forces concurrent `roll test` test-execution paths to bail
# fast rather than blocking on a half-rebuilt VM. --where is read-only and
# deliberately bypasses the lock.
_isolation_reset_lock_path() {
  echo ".roll/.iso-reset.lock"
}

_isolation_reset_lock_held() {
  [[ -f "$(_isolation_reset_lock_path)" ]]
}

# Returns 0 if the caller now holds the lock; 1 if someone else does.
_isolation_reset_acquire_lock() {
  local lock; lock=$(_isolation_reset_lock_path)
  if [[ -f "$lock" ]]; then
    return 1
  fi
  mkdir -p "$(dirname "$lock")"
  echo "$$" > "$lock"
  return 0
}

_isolation_reset_release_lock() {
  rm -f "$(_isolation_reset_lock_path)"
}

# ── `tart` adapter (US-ISO-002 — macOS Apple Silicon VM via Tart) ─────────
# Test override hooks (used by unit tests; default values keep prod stable):
#   _TART_VM_NAME      — VM identifier (default: roll-dev-test)
#   _TART_BASE_IMAGE   — OCI base image (default: cirruslabs macos-tahoe-base)
#   _TART_SSH_USER     — SSH user inside the VM (default: admin)

_isolation_tart_vm_name()    { printf '%s\n' "${_TART_VM_NAME:-roll-dev-test}"; }
_isolation_tart_base_image() { printf '%s\n' "${_TART_BASE_IMAGE:-ghcr.io/cirruslabs/macos-tahoe-base:latest}"; }
_isolation_tart_ssh_user()   { printf '%s\n' "${_TART_SSH_USER:-admin}"; }

_isolation_tart_check_platform() {
  if [[ "$(uname)" != "Darwin" ]] || [[ "$(uname -m)" != "arm64" ]]; then
    err "Tart 仅支持 Apple Silicon macOS"
    err "Tart only supports Apple Silicon macOS"
    return 1
  fi
  return 0
}

_isolation_tart_check_binary() {
  if ! command -v tart >/dev/null 2>&1; then
    err "tart binary not found"
    err "  install via: brew install cirruslabs/cli/tart"
    return 1
  fi
  return 0
}

# Returns 0 with the VM name on stdout when the VM is in `tart list`,
# returns 1 silently otherwise. Caller decides what to do.
_isolation_tart_vm_present() {
  local name; name=$(_isolation_tart_vm_name)
  tart list 2>/dev/null | awk -v n="$name" '$2 == n { found=1 } END { exit !found }'
}

# Returns the VM's IP on stdout when reachable; exit non-zero when the VM
# is stopped or `tart ip` fails for any other reason.
_isolation_tart_ip() {
  local name; name=$(_isolation_tart_vm_name)
  # FIX: tart ip returns a stale DHCP-cached IP even for stopped VMs.
  # Gate on tart list State field before trusting the IP.
  tart list 2>/dev/null | awk -v n="$name" '$2 == n && $NF == "running" { found=1 } END { exit !found }' || return 1
  local ip; ip=$(tart ip "$name" 2>/dev/null) || return 1
  [[ "$ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]] || return 1
  printf '%s\n' "$ip"
}

# Status state machine — see dev-vm-isolation-plan.md §4.
# Returns one of: not-installed | stopped | running | ready
_isolation_tart_status() {
  _isolation_tart_check_platform >/dev/null 2>&1 || { echo "not-installed"; return 0; }
  command -v tart >/dev/null 2>&1 || { echo "not-installed"; return 0; }
  _isolation_tart_vm_present || { echo "not-installed"; return 0; }
  local ip
  if ! ip=$(_isolation_tart_ip); then
    echo "stopped"
    return 0
  fi
  # VM up. Is it provisioned? A trivial SSH probe is the cheapest check.
  local user; user=$(_isolation_tart_ssh_user)
  if ssh -o BatchMode=yes -o ConnectTimeout=3 -o StrictHostKeyChecking=no \
         "${user}@${ip}" "true" >/dev/null 2>&1; then
    echo "ready"
  else
    echo "running"
  fi
  return 0
}

# init: ensure the base image is cloned into our VM slot. Idempotent —
# `tart clone` is skipped when the VM already exists.
_isolation_tart_init() {
  _isolation_tart_check_platform || return 1
  _isolation_tart_check_binary || return 1
  local name; name=$(_isolation_tart_vm_name)
  if _isolation_tart_vm_present; then
    return 0
  fi
  local img; img=$(_isolation_tart_base_image)
  tart clone "$img" "$name"
}

# provision: ensure runtime deps are installed inside the VM. Idempotent —
# brew install no-ops for already-installed packages. Requires the VM to
# be running with SSH responsive (caller's responsibility, usually exec).
_isolation_tart_provision() {
  _isolation_tart_check_platform || return 1
  _isolation_tart_check_binary || return 1
  local ip; ip=$(_isolation_tart_ip) || { err "tart provision: VM not running"; return 1; }
  local user; user=$(_isolation_tart_ssh_user)
  ssh -o BatchMode=yes -o StrictHostKeyChecking=no \
      "${user}@${ip}" "export PATH=/opt/homebrew/bin:/usr/local/bin:\$PATH; brew list bats >/dev/null 2>&1 || brew install bats-core; \
                       brew list node >/dev/null 2>&1 || brew install node; \
                       brew list bash >/dev/null 2>&1 || brew install bash"
}

# exec: run the command inside the VM. Auto-starts the VM if it's stopped.
# Mounts the host worktree at /Volumes/My Shared Files/roll (Tart virtiofs).
_isolation_tart_exec() {
  _isolation_tart_check_platform || return 1
  _isolation_tart_check_binary || return 1
  local name; name=$(_isolation_tart_vm_name)
  local ip
  if ! ip=$(_isolation_tart_ip); then
    # VM stopped — start it in the background with the repo mounted.
    local repo_root; repo_root="$(pwd -P)"
    tart run --no-graphics --dir="roll:${repo_root}" "$name" >/dev/null 2>&1 &
    # Wait up to ~30s for IP to come up.
    local i=0
    while (( i < 30 )); do
      if ip=$(_isolation_tart_ip); then break; fi
      sleep 1
      i=$((i + 1))
    done
    [[ -n "${ip:-}" ]] || { err "tart exec: VM failed to start in 30s"; return 1; }
  fi
  local user; user=$(_isolation_tart_ssh_user)
  local remote_cmd
  remote_cmd=$(printf '%q ' "$@")
  ssh -o BatchMode=yes -o StrictHostKeyChecking=no "${user}@${ip}" "export PATH=/opt/homebrew/bin:/usr/local/bin:\$PATH; cd '/Volumes/My Shared Files/roll' && $remote_cmd"
}

# reset: stop, delete, re-clone from base image, then re-provision.
# Target: ≤90s (caller's perception); actual depends on tart clone speed.
# Clone is called directly (not via init) so the sequence is unconditional —
# tart's own "VM exists" check still no-ops re-clone if delete didn't take.
_isolation_tart_reset() {
  _isolation_tart_check_platform || return 1
  _isolation_tart_check_binary || return 1
  local name; name=$(_isolation_tart_vm_name)
  local img; img=$(_isolation_tart_base_image)
  tart stop "$name" 2>/dev/null || true
  tart delete "$name" 2>/dev/null || true
  tart clone "$img" "$name" || return 1
  _isolation_tart_provision || true   # provision may fail mid-reset; surface
                                       # via subsequent status check.
}

# destroy: stop + delete. Doesn't rebuild.
_isolation_tart_destroy() {
  _isolation_tart_check_platform || return 1
  _isolation_tart_check_binary || return 1
  local name; name=$(_isolation_tart_vm_name)
  tart stop "$name" 2>/dev/null || true
  tart delete "$name" 2>/dev/null || true
  return 0
}

# ─── cmd_test ────────────────────────────────────────────────────────────
# US-ISO-003: `roll test` — runs the project's test suite through the
# isolation dispatcher. The configured `test_isolation.type` determines
# where the tests execute (host shell vs Tart VM). When type=tart and
# the VM fails to start, the failure surfaces non-zero — no silent
# fallback to host, since that would lie about where the tests ran.

# Print where the test suite will execute. Format is machine-readable
# (one token, optionally with a colon-separated detail) so scripts can
# parse it: `host`, `tart:<ip>`, `tart:stopped`, `tart:not-installed`, …
_cmd_test_where() {
  local type; type=$(_isolation_get_type)
  case "$type" in
    none)
      echo "host"
      ;;
    tart)
      local st; st=$(_isolation_tart_status)
      case "$st" in
        ready|running)
          local ip
          if ip=$(_isolation_tart_ip 2>/dev/null); then
            echo "tart:${ip}"
          else
            echo "tart:${st}"
          fi
          ;;
        *)
          echo "tart:${st}"
          ;;
      esac
      ;;
    *)
      echo "unknown:${type}"
      ;;
  esac
}

cmd_test() {
  # US-ISO-005: `--help` / `-h` anywhere in pre-`--` args shows help and
  # exits 0, so `roll test --reset --help` is a help lookup, not a reset.
  # Args appearing after `--` are forwarded verbatim and not intercepted.
  local _a
  for _a in "$@"; do
    case "$_a" in
      --) break ;;
      --help|-h) set -- --help; break ;;
    esac
  done
  case "${1:-}" in
    --help|-h)
      cat <<'EOF'
Usage: roll test [--where | --reset] [--] [<extra-args>...]

Runs the project's test suite through the isolation adapter chosen in
.roll/local.yaml:

  test_isolation:
    type: none   (default)   Direct host execution — same shell as `npm test`.
    type: tart               Inside the Apple-Silicon `roll-dev-test` Tart VM,
                             so tests can't reach the host's launchd / shared
                             roll state. Tart isn't auto-installed; run
                             `brew install cirruslabs/cli/tart` first.

Flags:
  --where        Print where tests will run, then exit (e.g. `host`,
                 `tart:192.168.64.5`, `tart:stopped`).
  --reset        Rebuild the isolation environment to a clean baseline.
                 type=tart: stop → delete → clone → provision (~90s).
                 type=none: prints a note and exits 0 (host is stateless).
                 Holds a lockfile under .roll/.iso-reset.lock; concurrent
                 `roll test` invocations fast-fail with a clear error.
  --help, -h     Show this help.

Examples:
  roll test                    Run affected tests (default: --affected HEAD~1).
  roll test -- tests/          Run the full suite explicitly.
  roll test -- --tier=fast     Forward arguments to npm test.
  roll test --where            Don't run; just report routing.
  roll test --reset            Rebuild the VM (or host no-op).

When type=tart and the VM can't be reached, the command exits non-zero
rather than silently falling back to host execution.
EOF
      return 0
      ;;
    --where)
      _cmd_test_where
      return 0
      ;;
    --reset)
      # Refuse if another reset is in progress — fast-fail beats blocking
      # on a half-rebuilt VM (US-ISO-004 AC).
      if _isolation_reset_lock_held; then
        err "roll test --reset: another reset is already in progress"
        echo "  lock: $(_isolation_reset_lock_path) (delete manually if stale)" >&2
        return 1
      fi
      _isolation_reset_acquire_lock || {
        err "roll test --reset: failed to acquire reset lock"
        return 1
      }
      # Make sure the lock comes off no matter how dispatch exits.
      trap '_isolation_reset_release_lock' RETURN
      _isolation_dispatch reset
      return $?
      ;;
    --)
      shift
      ;;
  esac

  # Test-execution path. If a reset is in progress, bail rather than racing
  # into a half-rebuilt VM — user can `roll test --where` to inspect state.
  if _isolation_reset_lock_held; then
    err "roll test: a reset is in progress (lock: $(_isolation_reset_lock_path))"
    echo "  re-run once the reset completes, or delete the lockfile if stale" >&2
    return 1
  fi

  # Pass remaining args through to npm test inside the configured adapter.
  # Default to --affected (HEAD~1 base) when the caller passes no extra args —
  # mirrors the pre-commit hook's intent and keeps VM runs fast.
  # To run the full suite explicitly: roll test -- tests/
  local _npm_args=("$@")
  if [[ "${#_npm_args[@]}" -eq 0 ]]; then
    _npm_args=(--affected)
  fi
  # Always pass args via `--` so npm doesn't intercept flags like --affected
  # as npm config options (npm warns and silently drops them otherwise).
  _isolation_dispatch exec npm test -- "${_npm_args[@]}"
}

# ═══════════════════════════════════════════════════════════════════════════════
# LOOP — autonomous BACKLOG executor management
# ═══════════════════════════════════════════════════════════════════════════════

# Returns a filesystem-safe slug combining the project basename and a 6-char
# hash of the full path, ensuring uniqueness across sibling dirs with same name.
_project_slug() {
  # US-LOOP-006: cycle wrapper exports ROLL_MAIN_SLUG so any subshell — worktree,
  # tmp cwd, or unrelated path — writes events / runs.jsonl under the main project
  # identity instead of fragmenting into tmp-* / cycle-* phantom slugs.
  if [[ -n "${ROLL_MAIN_SLUG:-}" ]]; then
    printf '%s' "$ROLL_MAIN_SLUG"
    return 0
  fi
  local path="${1:-$(pwd -P 2>/dev/null || pwd)}"
  # FIX-056: normalize path to canonical case on macOS case-insensitive filesystem.
  # Two paths differing only in case point to the same directory; realpath
  # resolves both symlinks and case variations to the canonical filesystem path.
  if [[ "$(uname -s 2>/dev/null)" == "Darwin" ]]; then
    local _canon
    _canon=$(realpath "$path" 2>/dev/null) && path="$_canon"
  fi
  # FIX-034: when inside a git worktree, git-common-dir returns the main tree's
  # absolute .git path; resolve to the main tree so worktree and main-tree runs
  # produce the same slug.
  local _common
  _common=$(git -C "$path" rev-parse --git-common-dir 2>/dev/null)
  if [[ -n "$_common" && "$_common" == *"/.git" ]]; then
    path="${_common%/.git}"
  fi

  # US-OBS-010: derive slug from git remote URL for stable cross-machine
  # identity.  Normalize: strip .git, git@HOST:PATH → https://HOST/PATH,
  # lowercase.  Fallback chain: origin → first available remote → path-based.
  local remote_url
  remote_url=$(git -C "$path" remote get-url origin 2>/dev/null)
  if [[ -z "$remote_url" ]]; then
    local first_remote
    first_remote=$(git -C "$path" remote 2>/dev/null | head -1)
    if [[ -n "$first_remote" ]]; then
      remote_url=$(git -C "$path" remote get-url "$first_remote" 2>/dev/null)
    fi
  fi

  if [[ -n "$remote_url" ]]; then
    remote_url="${remote_url%.git}"
    if [[ "$remote_url" =~ ^git@([^:]+):(.+)$ ]]; then
      remote_url="https://${BASH_REMATCH[1]}/${BASH_REMATCH[2]}"
    fi
    remote_url=$(printf '%s' "$remote_url" | tr '[:upper:]' '[:lower:]')
    local base; base=$(basename "$remote_url")
    local hash
    if command -v md5 &>/dev/null; then
      hash=$(printf '%s' "$remote_url" | md5 | cut -c1-6)
    else
      hash=$(printf '%s' "$remote_url" | md5sum | cut -c1-6)
    fi
    base=$(printf '%s' "$base" | tr -cs '[:alnum:]' '-' | sed 's/-*$//')
    printf '%s' "${base}-${hash}"
    return 0
  fi

  # No remote available — fall back to path-based slug.
  # If roll_records_remote is configured, warn the user: the slug won't be
  # stable across machines, so cross-machine sync cannot work.
  local records_remote
  records_remote=$(config_get "roll_records_remote" "")
  if [[ -n "$records_remote" ]]; then
    printf 'roll: WARNING — roll_records_remote is configured but no git remote URL found; slug will fall back to path-based (cross-machine merge will not work)\n' >&2
  fi

  local base; base=$(basename "$path")
  local hash
  if command -v md5 &>/dev/null; then
    hash=$(printf '%s' "$path" | md5 | cut -c1-6)
  else
    hash=$(printf '%s' "$path" | md5sum | cut -c1-6)
  fi
  base=$(printf '%s' "$base" | tr -cs '[:alnum:]' '-' | sed 's/-*$//')
  printf '%s' "${base}-${hash}"
}

# FIX-058: migrate loop state files when the per-project slug changed due to
# FIX-056 (realpath case-normalization on macOS).  Called by
# _install_launchd_plists before generating new runner/plist so existing state
# (paused/running/etc.) is not silently lost.
#
# Usage: _slug_migrate_from_legacy <new_slug> [<loop_dir>] [<old_slug>]
#   new_slug   — the correct slug computed by the current _project_slug
#   loop_dir   — optional override of ${_SHARED_ROOT}/loop (for unit tests)
#   old_slug   — optional explicit old slug (for unit tests; auto-computed otherwise)
_slug_migrate_from_legacy() {
  local new_slug="$1"
  local loop_dir="${2:-${_SHARED_ROOT}/loop}"
  local old_slug="${3:-}"

  if [[ -z "$old_slug" ]]; then
    [[ "$(uname -s 2>/dev/null)" == "Darwin" ]] || return 0
    # Compute the pre-FIX-056 slug: same algorithm but without realpath.
    local raw_path; raw_path=$(pwd 2>/dev/null)
    # FIX-094: `_common=$(...)` as a standalone assignment statement inherits the
    # command's exit code; `set -e` aborts the whole script when cwd is non-git
    # (git rev-parse exits 128). `|| true` keeps probing semantics — we WANT to
    # detect "no git" by empty output, not by killing the script.
    local _common
    _common=$(git -C "$raw_path" rev-parse --git-common-dir 2>/dev/null) || true
    if [[ -n "$_common" && "$_common" == *"/.git" ]]; then
      raw_path="${_common%/.git}"
    fi
    local old_base; old_base=$(basename "$raw_path")
    local old_hash
    if command -v md5 &>/dev/null; then
      old_hash=$(printf '%s' "$raw_path" | md5 | cut -c1-6)
    else
      old_hash=$(printf '%s' "$raw_path" | md5sum | cut -c1-6)
    fi
    old_base=$(printf '%s' "$old_base" | tr -cs '[:alnum:]' '-' | sed 's/-*$//')
    old_slug="${old_base}-${old_hash}"
  fi

  [[ "$old_slug" == "$new_slug" ]] && return 0

  # US-LOOP-019: control-plane files (state/ALERT/PAUSE/mute) moved to
  # project-local .roll/loop/; slug change does not affect them. Only
  # migrate data-plane files and runner/plist artifacts.
  # Idempotency guard: if project-local state already exists, skip legacy
  # loop_dir migration entirely (data was already moved by _loop_migrate_legacy_paths).
  local _proj_state="$(_loop_runtime_dir "$new_slug" 2>/dev/null || echo "")/state-${new_slug}.yaml"
  if [[ -f "$_proj_state" ]]; then
    # Control-plane is project-local; only clean up old runner/plist.
    local old_plist=~/Library/LaunchAgents/com.roll.loop.${old_slug}.plist
    if [[ -f "$old_plist" ]]; then
      _launchctl_safe unload "$old_plist" 2>/dev/null || true
      rm -f "$old_plist"
    fi
    rm -f "${loop_dir}/run-${old_slug}.sh" "${loop_dir}/run-${old_slug}-inner.sh"
    return 0
  fi

  # Still check if there's anything in loop_dir to migrate
  [[ -f "${loop_dir}/state-${old_slug}.yaml" ]] || [[ -f "${loop_dir}/events-${old_slug}.ndjson" ]] || [[ -f "${loop_dir}/cron-${old_slug}.log" ]] || [[ -f "${loop_dir}/runs.jsonl" ]] || return 0

  printf 'roll: migrating loop records %s → %s\n' "$old_slug" "$new_slug" >&2

  [[ -f "${loop_dir}/state-${old_slug}.yaml" ]] && \
    mv "${loop_dir}/state-${old_slug}.yaml" "${loop_dir}/state-${new_slug}.yaml"

  [[ -f "${loop_dir}/cron-${old_slug}.log" ]] && \
    mv "${loop_dir}/cron-${old_slug}.log" "${loop_dir}/cron-${new_slug}.log"

  if [[ -f "${loop_dir}/events-${old_slug}.ndjson" ]]; then
    if [[ -f "${loop_dir}/events-${new_slug}.ndjson" ]]; then
      cat "${loop_dir}/events-${old_slug}.ndjson" >> "${loop_dir}/events-${new_slug}.ndjson"
      rm "${loop_dir}/events-${old_slug}.ndjson"
    else
      mv "${loop_dir}/events-${old_slug}.ndjson" "${loop_dir}/events-${new_slug}.ndjson"
    fi
  fi

  local runs_file="${loop_dir}/runs.jsonl"
  if [[ -f "$runs_file" ]]; then
    local tmp; tmp=$(mktemp)
    python3 - "$old_slug" "$new_slug" "$runs_file" > "$tmp" << 'PYEOF'
import json, sys
old, new, path = sys.argv[1], sys.argv[2], sys.argv[3]
with open(path) as f:
    for line in f:
        line = line.rstrip('\n')
        if not line:
            continue
        try:
            d = json.loads(line)
            if 'project' in d and old in str(d['project']):
                d['project'] = str(d['project']).replace(old, new)
            print(json.dumps(d))
        except Exception:
            print(line)
PYEOF
    mv "$tmp" "$runs_file"
  fi

  local old_plist=~/Library/LaunchAgents/com.roll.loop.${old_slug}.plist
  if [[ -f "$old_plist" ]]; then
    _launchctl_safe unload "$old_plist" 2>/dev/null || true
    rm -f "$old_plist"
  fi

  rm -f "${loop_dir}/run-${old_slug}.sh" "${loop_dir}/run-${old_slug}-inner.sh"
}

# US-OBS-010: path-based slug (without remote URL) — used to detect the
# pre-remote slug so migration can merge old records into the new identity.
_project_slug_path_based() {
  local path="${1:-$(pwd -P 2>/dev/null || pwd)}"
  if [[ "$(uname -s 2>/dev/null)" == "Darwin" ]]; then
    local _canon
    _canon=$(realpath "$path" 2>/dev/null) && path="$_canon"
  fi
  local _common
  _common=$(git -C "$path" rev-parse --git-common-dir 2>/dev/null)
  if [[ -n "$_common" && "$_common" == *"/.git" ]]; then
    path="${_common%/.git}"
  fi
  local base; base=$(basename "$path")
  local hash
  if command -v md5 &>/dev/null; then
    hash=$(printf '%s' "$path" | md5 | cut -c1-6)
  else
    hash=$(printf '%s' "$path" | md5sum | cut -c1-6)
  fi
  base=$(printf '%s' "$base" | tr -cs '[:alnum:]' '-' | sed 's/-*$//')
  printf '%s' "${base}-${hash}"
}

# US-OBS-010: migrate loop records from old path-based slug to new
# remote-based slug.  Dedup by run_id; atomic cp→tmp→mv; keep old as .bak.
#
# Usage: _slug_migrate_to_remote <project_path> [<loop_dir>]
_slug_migrate_to_remote() {
  local project_path="$1"
  local loop_dir="${2:-${_SHARED_ROOT}/loop}"

  local new_slug; new_slug=$(_project_slug "$project_path")
  local old_slug; old_slug=$(_project_slug_path_based "$project_path")

  # Dedup guard: no migration needed
  [[ "$old_slug" == "$new_slug" ]] && return 0

  # US-LOOP-019: if project-local state already exists, data was migrated.
  local _proj_state="$(_loop_runtime_dir "$new_slug" 2>/dev/null || echo "")/state-${new_slug}.yaml"
  if [[ -f "$_proj_state" ]]; then
    return 0
  fi

  [[ -f "${loop_dir}/events-${old_slug}.ndjson" ]] || return 0

  printf 'roll: migrating loop records %s → %s (cross-machine slug)\n' "$old_slug" "$new_slug" >&2

  # Migrate events with run_id dedup + atomic write
  local events_file="${loop_dir}/events-${new_slug}.ndjson"
  local tmp_events; tmp_events=$(mktemp)

  # Collect existing run_ids from new slug file
  local existing_ids=""
  if [[ -f "$events_file" ]]; then
    existing_ids=$(python3 -c "
import json, sys
try:
    with open('$events_file') as f:
        for line in f:
            d = json.loads(line.strip())
            if 'label' in d:
                print(d['label'])
except: pass
" 2>/dev/null)
  fi

  # Append old events to new, dedup by label (run_id / cycle_id)
  python3 - "$old_slug" "$events_file" "$tmp_events" << 'PYEOF'
import json, sys
old_slug, new_file, tmp_file = sys.argv[1], sys.argv[2], sys.argv[3]

# Read existing new-file ids
seen = set()
try:
    with open(new_file) as f:
        for line in f:
            d = json.loads(line.strip())
            if 'label' in d:
                seen.add(d['label'])
except FileNotFoundError:
    pass

# Append old events, deduped
old_file = new_file.replace(new_file.split('-')[-1].split('.')[0], old_slug)
with open(tmp_file, 'w') as out:
    # Copy existing new file
    try:
        with open(new_file) as f:
            out.write(f.read())
    except FileNotFoundError:
        pass
    # Append old events not yet seen
    old_path = '/'.join(new_file.rsplit('/', 1)[:-1] + [f'events-{old_slug}.ndjson'])
    try:
        with open(old_path) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                d = json.loads(line)
                lid = d.get('label', '')
                if lid not in seen:
                    seen.add(lid)
                    out.write(json.dumps(d) + '\n')
    except FileNotFoundError:
        pass
PYEOF

  # Atomic replace
  mv "$tmp_events" "$events_file"

  # Keep old events as .bak (do not delete)
  cp "${loop_dir}/events-${old_slug}.ndjson" "${loop_dir}/events-${old_slug}.ndjson.bak"
  rm "${loop_dir}/events-${old_slug}.ndjson"

  # Migrate runs.jsonl: rewrite project field + dedup by run_id
  local runs_file="${loop_dir}/runs.jsonl"
  if [[ -f "$runs_file" ]]; then
    local tmp; tmp=$(mktemp)
    python3 - "$old_slug" "$new_slug" "$runs_file" > "$tmp" << 'PYEOF'
import json, sys
old, new, path = sys.argv[1], sys.argv[2], sys.argv[3]
seen = set()
with open(path) as f:
    for line in f:
        line = line.rstrip('\n')
        if not line:
            continue
        try:
            d = json.loads(line)
            if 'project' in d and old in str(d['project']):
                d['project'] = str(d['project']).replace(old, new)
            rid = d.get('run_id', '')
            if rid in seen:
                continue
            seen.add(rid)
            print(json.dumps(d))
        except Exception:
            print(line)
PYEOF
    mv "$tmp" "$runs_file"
  fi
}

_LOOP_TAG="# roll-loop"
# FIX-065: when sourced in a test context with no explicit override, route
# shared state into a per-process /tmp path instead of falling back to
# production ~/.shared/roll/. Without this safety net, tests that source
# bin/roll (directly or via a generated inner runner under /var/folders/)
# would write ALERT / state / events / runs.jsonl into the live loop
# daemon's monitored directory and trigger false aborts.
#
# Test context is detected via three signals (any one is enough):
#   1. BATS_TEST_FILENAME is set (works for direct test invocations)
#   2. The caller's file path lives under /tmp or /var/folders (catches the
#      generated runner-inner.sh path that bats subprocesses spawn —
#      BATS_* env can be lost across `bash -l` + nested forks)
#   3. PWD is under /tmp or /var/folders (catches helpers that cd'd in)
if [ -z "${_SHARED_ROOT:-}" ]; then
  _roll_in_test_ctx=0
  if [ -n "${BATS_TEST_FILENAME:-}" ]; then
    _roll_in_test_ctx=1
  else
    _roll_caller="${BASH_SOURCE[1]:-}"
    case "$_roll_caller" in /tmp/*|/private/tmp/*|/var/folders/*) _roll_in_test_ctx=1 ;; esac
    case "$PWD" in /tmp/*|/private/tmp/*|/var/folders/*) _roll_in_test_ctx=1 ;; esac
  fi
  if [ "$_roll_in_test_ctx" = 1 ]; then
    _SHARED_ROOT="${TMPDIR:-/tmp}/roll-test-shared.$$"
    mkdir -p "${_SHARED_ROOT}/loop"
    export _SHARED_ROOT
  fi
  unset _roll_in_test_ctx _roll_caller
fi
: "${_SHARED_ROOT:=${HOME}/.shared/roll}"
# FIX-052: per-project loop state — ALERT/state/mute were globally shared,
# causing one project's alerts to surface in another project's session and
# letting concurrent cycles overwrite each other's state. Align with the
# existing events-/run-/LOCK-/heartbeat-<slug> namespacing.
: "${_LOOP_PROJ_SLUG:=$(_project_slug 2>/dev/null || echo default)}"
# US-LOOP-019: prefer project-local .roll/loop/ for control-plane files;
# fall back to shared root when project path cannot be resolved.
_LOOP_RT_DIR=""
if _rt_dir_out=$(_loop_runtime_dir "${_LOOP_PROJ_SLUG}" 2>/dev/null); then
  _LOOP_RT_DIR="$_rt_dir_out"
fi
if [[ -n "$_LOOP_RT_DIR" ]]; then
  : "${_LOOP_STATE:=${_LOOP_RT_DIR}/state-${_LOOP_PROJ_SLUG}.yaml}"
  : "${_LOOP_ALERT:=${_LOOP_RT_DIR}/ALERT-${_LOOP_PROJ_SLUG}.md}"
  : "${_LOOP_MUTE_FILE:=${_LOOP_RT_DIR}/mute-${_LOOP_PROJ_SLUG}}"
else
  : "${_LOOP_STATE:=${_SHARED_ROOT}/loop/state-${_LOOP_PROJ_SLUG}.yaml}"
  : "${_LOOP_ALERT:=${_SHARED_ROOT}/loop/ALERT-${_LOOP_PROJ_SLUG}.md}"
  : "${_LOOP_MUTE_FILE:=${_SHARED_ROOT}/loop/mute-${_LOOP_PROJ_SLUG}}"
fi
# FIX-065: was hardcoded to ${HOME}/.shared/roll/loop/runs.jsonl which ignored
# _SHARED_ROOT overrides and silently leaked test runs.jsonl writes into prod.
# US-LOOP-020: this remains the legacy shared-root path for backward-compatible
# reads / migration only. New cycle rows land in the project-local runs.jsonl
# (see _runs_append + _loop_runs_file). Tests still override _LOOP_RUNS directly.
_LOOP_RUNS="${_SHARED_ROOT}/loop/runs.jsonl"

# US-LOOP-020: resolve the *current project's* runs.jsonl for read-side commands
# (`roll loop runs`, --detail, signals, backfill). Resolution order:
#   1. ROLL_PROJECT_RUNTIME_DIR env (test sandbox) → <dir>/runs.jsonl
#   2. <current project>/.roll/loop/runs.jsonl  (the project we are invoked in)
# Tests that set _LOOP_RUNS directly are honored by callers that read it; this
# helper is for the live, project-local path. Prints the path; never fails.
_loop_runs_file() {
  if [[ -n "${ROLL_PROJECT_RUNTIME_DIR:-}" ]]; then
    echo "${ROLL_PROJECT_RUNTIME_DIR}/runs.jsonl"
    return 0
  fi
  # An explicit _LOOP_RUNS override (set by tests, or pointed at a custom path)
  # wins over the live project-local default — but only when it diverges from
  # the shared-root default. Otherwise resolve the current project's file.
  if [[ -n "${_LOOP_RUNS:-}" && "${_LOOP_RUNS}" != "${_SHARED_ROOT}/loop/runs.jsonl" ]]; then
    echo "${_LOOP_RUNS}"
    return 0
  fi
  local proj; proj="${ROLL_MAIN_PROJECT:-$(pwd -P)}"
  echo "${proj}/.roll/loop/runs.jsonl"
}

# US-LOOP-020: enumerate every loop-registered project and emit the union of
# their .roll/loop/runs.jsonl rows as one newline-separated jsonl stream,
# sorted oldest→newest by .ts (caller reverses for newest-first display).
# Project enumeration uses the launchd plist registry on macOS (same source as
# _status_loop_overview); on Linux (no plists) it falls back to the current
# project's local file. A test sandbox can point the whole machine at one dir
# via ROLL_PROJECT_RUNTIME_DIR, or list extra files via ROLL_LOOP_RUNS_ALL_DIRS
# (':'-separated runtime dirs) for cross-project fixtures without launchd.
_loop_runs_aggregate_all() {
  command -v jq >/dev/null 2>&1 || return 0
  # US-LOOP-020 hook point: ROLL_LOOP_RUNS_CACHE_TTL (default 0 = no cache).
  # Reserved for a future caching layer over the cross-project merge; the
  # default 0 means every call re-reads live files. No implementation built yet.
  : "${ROLL_LOOP_RUNS_CACHE_TTL:=0}"
  # Candidate runs.jsonl files, one per line. Built from the project registry,
  # then deduped exactly (newline-bounded membership — no substring aliasing).
  local _candidates=""
  if [[ -n "${ROLL_LOOP_RUNS_ALL_DIRS:-}" ]]; then
    # Explicit fixture dirs (tests) take precedence and avoid touching launchd.
    local _oldifs="$IFS"; IFS=':'
    local _d
    for _d in ${ROLL_LOOP_RUNS_ALL_DIRS}; do
      _candidates="${_candidates}${_d}/runs.jsonl
"
    done
    IFS="$_oldifs"
  elif [[ -n "${ROLL_PROJECT_RUNTIME_DIR:-}" ]]; then
    _candidates="${ROLL_PROJECT_RUNTIME_DIR}/runs.jsonl
"
  elif [[ "$(uname)" == "Darwin" ]]; then
    local _plist _proj
    while IFS= read -r _plist; do
      [[ -f "$_plist" ]] || continue
      _proj=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$_plist" 2>/dev/null)
      [[ -n "$_proj" ]] || continue
      _candidates="${_candidates}${_proj}/.roll/loop/runs.jsonl
"
    done < <(ls "${_LAUNCHD_DIR}"/com.roll.loop.*.plist 2>/dev/null)
  fi
  # Always include the current project's file as a fallback / supplement.
  _candidates="${_candidates}$(_loop_runs_file)
"

  # Dedup (exact, newline-bounded) + keep only non-empty existing files.
  local files="" seen="
"
  local _f
  while IFS= read -r _f; do
    [[ -n "$_f" && -f "$_f" && -s "$_f" ]] || continue
    case "$seen" in *"
${_f}
"*) continue ;; esac
    seen="${seen}${_f}
"
    files="${files}${_f}
"
  done <<EOF
$_candidates
EOF

  [[ -n "$files" ]] || return 0

  # Merge + stable sort by ts (rows lacking ts sort first). jq's sort_by is a
  # stable sort, so same-ts rows keep file/encounter order.
  printf '%s' "$files" | while IFS= read -r _f; do
    [[ -n "$_f" ]] || continue
    cat "$_f"
  done | jq -c '.' 2>/dev/null | jq -s -c 'sort_by(.ts // "") | .[]' 2>/dev/null
}

# ──────────────────────────────────────────────────────────────
# US-LOOP-018: _loop_resolve_project_path + _loop_runtime_dir
#
# Two helpers that form the single source-of-truth for resolving
# "where does this project's loop runtime data live?". All callers
# (inner/outer runner templates, dashboard, GC, pause/resume, etc.)
# must go through _loop_runtime_dir — never inline paths.
#
# Resolution order for _loop_resolve_project_path:
#   1. macOS: launchd plist WorkingDirectory key (already used by
#      _status_loop_overview — implicit registry, no new state file)
#   2. Linux: crontab entry (grep for "cd \"<path>\"")  — best-effort
#   3. Universal fallback: inner runner script grep for
#      export ROLL_MAIN_PROJECT="..."  (inner scripts carry this;
#      outer runner scripts do not — pi peer review correction)
# ──────────────────────────────────────────────────────────────

# _loop_resolve_project_path <slug>
# Resolve a slug to its project directory path.
# Returns 0 and prints path on success; returns 1 on failure.
_loop_resolve_project_path() {
  local slug="$1"

  # 1. macOS: launchd plist WorkingDirectory (primary source)
  if [[ "$(uname)" = "Darwin" ]]; then
    local plist_dir="${HOME}/Library/LaunchAgents"
    local plist="${plist_dir}/com.roll.loop.${slug}.plist"
    if [[ -f "$plist" ]]; then
      local proj; proj=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)
      if [[ -n "$proj" ]]; then
        echo "$proj"
        return 0
      fi
    fi
  fi

  # 2. Linux: crontab entry (best-effort)
  if command -v crontab >/dev/null 2>&1; then
    local cron_line; cron_line=$(crontab -l 2>/dev/null | grep "run-${slug}.sh" | head -1)
    if [[ -n "$cron_line" ]]; then
      # Extract cd path: "cd \"<path>\" && ..."
      local proj; proj=$(echo "$cron_line" | sed -n 's/.*cd[[:space:]]*"\([^"]*\)".*/\1/p')
      if [[ -n "$proj" && -d "$proj" ]]; then
        echo "$proj"
        return 0
      fi
    fi
  fi

  # 3. Universal fallback: inner runner script ROLL_MAIN_PROJECT export
  local inner_script="${_SHARED_ROOT:-${HOME}/.shared/roll}/loop/run-${slug}-inner.sh"
  if [[ -f "$inner_script" ]]; then
    local proj; proj=$(grep '^export ROLL_MAIN_PROJECT=' "$inner_script" 2>/dev/null | head -1 | sed 's/.*="\(.*\)"/\1/')
    if [[ -n "$proj" ]]; then
      echo "$proj"
      return 0
    fi
  fi

  return 1
}

# _loop_runtime_dir <slug>
# Return the project's .roll/loop/ directory path.
# Priority: ROLL_PROJECT_RUNTIME_DIR env → resolved project + .roll/loop
# Returns 0 and prints path on success; returns 1 on failure.
_loop_runtime_dir() {
  local slug="$1"

  # 1. Environment override (test sandbox)
  if [[ -n "${ROLL_PROJECT_RUNTIME_DIR:-}" ]]; then
    echo "${ROLL_PROJECT_RUNTIME_DIR}"
    return 0
  fi

  # 2. Resolve project path and append .roll/loop
  local proj; proj=$(_loop_resolve_project_path "$slug")
  if [[ -z "$proj" ]]; then
    return 1
  fi
  echo "${proj}/.roll/loop"
}

# US-LOOP-040: render this cycle's exit summary to stdout for the .command
# Terminal window (after tmux attach exits, before `press enter to close`).
# Pure read-side view: resolves the project-local runs.jsonl / events.ndjson /
# backlog / cron log, then delegates rendering to lib/loop-exit-summary.py.
#
# Usage: _loop_render_exit_summary <slug> [<cycle_id>]
#
# AC: silent fallback — when python3 is missing, the renderer script is absent,
# or any source is broken, this MUST return 0 (never blocks `press enter`).
_loop_render_exit_summary() {
  local slug="${1:-}"
  local cycle_id="${2:-}"
  [ -n "$slug" ] || return 0

  local renderer="${ROLL_PKG_DIR}/lib/loop-exit-summary.py"
  [ -f "$renderer" ] || return 0
  command -v python3 >/dev/null 2>&1 || return 0

  # Resolve the project-local runtime dir (honors ROLL_PROJECT_RUNTIME_DIR for
  # tests); fall back to the shared root for transient slugs.
  local rt_dir
  rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  local shared="${_SHARED_ROOT:-$HOME/.shared/roll}/loop"

  local runs="" events="" cron_log=""
  if [ -n "$rt_dir" ] && [ -f "${rt_dir}/runs.jsonl" ]; then
    runs="${rt_dir}/runs.jsonl"
  elif [ -f "${shared}/runs.jsonl" ]; then
    runs="${shared}/runs.jsonl"
  fi
  if [ -n "$rt_dir" ] && [ -f "${rt_dir}/events.ndjson" ]; then
    events="${rt_dir}/events.ndjson"
  elif [ -f "${shared}/events-${slug}.ndjson" ]; then
    events="${shared}/events-${slug}.ndjson"
  fi
  # cron log lives under the shared root (per-project file).
  if [ -f "${shared}/cron-${slug}.log" ]; then
    cron_log="${shared}/cron-${slug}.log"
  fi

  # ALERT-<slug>.md drives the yellow ⚠ highlight (US-LOOP-041). Project-local
  # control-plane file first, shared root as fallback.
  local alert=""
  if [ -n "$rt_dir" ] && [ -f "${rt_dir}/ALERT-${slug}.md" ]; then
    alert="${rt_dir}/ALERT-${slug}.md"
  elif [ -f "${shared}/ALERT-${slug}.md" ]; then
    alert="${shared}/ALERT-${slug}.md"
  fi

  # Backlog: project-local .roll/backlog.md relative to the resolved runtime dir.
  local backlog=""
  if [ -n "$rt_dir" ]; then
    local _proj_root="${rt_dir%/.roll/loop}"
    [ -f "${_proj_root}/.roll/backlog.md" ] && backlog="${_proj_root}/.roll/backlog.md"
  fi
  [ -z "$backlog" ] && [ -f ".roll/backlog.md" ] && backlog=".roll/backlog.md"

  local _args
  _args=""
  [ -n "$runs" ] && _args="$_args --runs $runs"
  [ -n "$events" ] && _args="$_args --events $events"
  [ -n "$backlog" ] && _args="$_args --backlog $backlog"
  [ -n "$cron_log" ] && _args="$_args --cron-log $cron_log"
  [ -n "$alert" ] && _args="$_args --alert $alert"
  [ -n "$cycle_id" ] && _args="$_args --cycle-id $cycle_id"

  # shellcheck disable=SC2086 — args are space-free paths assembled above.
  python3 "$renderer" $_args 2>/dev/null || return 0
  return 0
}

# ──────────────────────────────────────────────────────────────
# US-LOOP-019: _loop_migrate_legacy_paths — one-shot migration of
# control-plane files (state / ALERT / PAUSE / mute) from legacy
# ~/.shared/roll/loop/ to project-local .roll/loop/. Idempotent.
# Called by outer runner before reading control state.
# ──────────────────────────────────────────────────────────────

# _loop_migrate_legacy_paths <slug> [<legacy_dir>]
# Migrate control-plane files. Skips if new path already has the file.
# Marks old files as .migrated-<timestamp> after successful copy.
_loop_migrate_legacy_paths() {
  local slug="$1"
  local legacy_dir="${2:-${_SHARED_ROOT:-$HOME/.shared/roll}/loop}"

  local rt_dir
  rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  if [[ -z "$rt_dir" ]]; then
    return 0
  fi
  mkdir -p "$rt_dir"

  local ts
  ts=$(date +%s)

  for _f in "state-${slug}.yaml" "ALERT-${slug}.md" "PAUSE-${slug}" "mute-${slug}"; do
    local legacy="${legacy_dir}/${_f}"
    [[ -f "$legacy" ]] || continue
    # Skip if already migrated (has .migrated-* marker)
    if compgen -G "${legacy}.migrated-"* >/dev/null 2>&1; then
      continue
    fi
    local target="${rt_dir}/${_f}"
    # If target already exists (newer), just mark legacy as migrated
    if [[ -f "$target" ]]; then
      mv "$legacy" "${legacy}.migrated-${ts}"
      continue
    fi
    cp "$legacy" "$target"
    mv "$legacy" "${legacy}.migrated-${ts}"
  done
  return 0
}

# ──────────────────────────────────────────────────────────────
# US-LOOP-020: _loop_migrate_legacy_runs — one-shot migration of the
# legacy machine-wide ~/.shared/roll/loop/runs.jsonl into per-project
# .roll/loop/runs.jsonl, split by each row's `project` slug. Rows whose
# project slug cannot be resolved to a runtime dir are left behind so no
# history is lost. After splitting, the legacy file is renamed to
# runs.jsonl.migrated-<ts> (GC reaps it after 7 days). Idempotent: the
# rename makes a second run a no-op. Called by the outer runner at startup.
# ──────────────────────────────────────────────────────────────
# _loop_migrate_legacy_runs [<legacy_runs_file>]
_loop_migrate_legacy_runs() {
  local legacy="${1:-${_SHARED_ROOT:-$HOME/.shared/roll}/loop/runs.jsonl}"
  [[ -f "$legacy" ]] || return 0
  command -v jq >/dev/null 2>&1 || return 0

  local ts; ts=$(date +%s)
  # Distinct project slugs present in the legacy file (skip empty/null).
  local projects; projects=$(jq -r '.project // empty' "$legacy" 2>/dev/null | sort -u)
  [[ -n "$projects" ]] || { mv "$legacy" "${legacy}.migrated-${ts}" 2>/dev/null || true; return 0; }

  local _unresolved=0
  local _p
  while IFS= read -r _p; do
    [[ -n "$_p" ]] || continue
    local _rt; _rt=$(_loop_runtime_dir "$_p" 2>/dev/null || echo "")
    if [[ -z "$_rt" ]]; then
      _unresolved=1
      continue
    fi
    mkdir -p "$_rt"
    local _dst="${_rt}/runs.jsonl"
    # Append only rows not already present (dedup by run_id) — idempotent even
    # if a partial migration ran before.
    local _rows; _rows=$(jq -c --arg p "$_p" 'select(.project == $p)' "$legacy" 2>/dev/null)
    local _line
    while IFS= read -r _line; do
      [[ -n "$_line" ]] || continue
      local _rid; _rid=$(printf '%s' "$_line" | jq -r '.run_id // ""' 2>/dev/null)
      if [[ -n "$_rid" ]] && grep -qF "\"run_id\":\"$_rid\"" "$_dst" 2>/dev/null; then
        continue
      fi
      printf '%s\n' "$_line" >> "$_dst"
    done <<EOF
$_rows
EOF
  done <<EOF
$projects
EOF

  # Only retire the legacy file once every project resolved; otherwise keep it
  # so the next cycle can retry the unresolved slugs.
  if [[ "$_unresolved" -eq 0 ]]; then
    mv "$legacy" "${legacy}.migrated-${ts}" 2>/dev/null || true
  fi
  return 0
}

# _loop_control_state_path <slug> <basename> [<legacy_dir>]
# Dual-path read for control-plane files during 7-day migration window.
# Prefers project-local .roll/loop/; falls back to legacy path.
# Returns 0 and prints path on success; returns 1 if neither exists.
_loop_control_state_path() {
  local slug="$1" basename="$2"
  local rt_dir legacy_dir

  rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  legacy_dir="${3:-${_SHARED_ROOT:-$HOME/.shared/roll}/loop}"

  local new_path="${rt_dir}/${basename}"
  local legacy_path="${legacy_dir}/${basename}"

  if [[ -n "$rt_dir" ]] && [[ -f "$new_path" ]]; then
    echo "$new_path"
    return 0
  fi
  if [[ -f "$legacy_path" ]]; then
    echo "$legacy_path"
    return 0
  fi
  return 1
}

# FIX-087: parallel to FIX-065's _SHARED_ROOT auto-sandbox above. Without this,
# tests that source bin/roll (directly via BATS or indirectly via a runner-inner
# fork under /tmp / /var/folders/) wrote plists into the developer's real
# ~/Library/LaunchAgents/ while their runner paths lived in the sandbox. When
# the sandbox got cleaned up, those plists outlived their runners and launchd
# fired them every hour with EX_CONFIG, silently killing the autonomous loop.
# Detection signals mirror the _SHARED_ROOT block; under a sandbox we route
# _LAUNCHD_DIR into _SHARED_ROOT so a single teardown removes both.
if [ -z "${_LAUNCHD_DIR:-}" ]; then
  # When HOME itself is a sandbox dir (run_roll-style wrappers, roll_status
  # setup, etc.) the default ${HOME}/Library/LaunchAgents is ALREADY in the
  # sandbox — redirecting again would break tests that pre-seed plists under
  # the sandboxed HOME. Only auto-sandbox when HOME points to a real user dir.
  case "${HOME:-}" in
    /tmp/*|/private/tmp/*|*/var/folders/*|*/tmp.*) ;;
    *)
      _roll_in_test_ctx=0
      if [ -n "${BATS_TEST_FILENAME:-}" ]; then
        _roll_in_test_ctx=1
      else
        _roll_caller="${BASH_SOURCE[1]:-}"
        case "$_roll_caller" in /tmp/*|/private/tmp/*|/var/folders/*) _roll_in_test_ctx=1 ;; esac
        case "$PWD" in /tmp/*|/private/tmp/*|/var/folders/*) _roll_in_test_ctx=1 ;; esac
      fi
      if [ "$_roll_in_test_ctx" = 1 ]; then
        _LAUNCHD_DIR="${_SHARED_ROOT}/LaunchAgents"
        mkdir -p "$_LAUNCHD_DIR"
        export _LAUNCHD_DIR
        # FIX-097: same trigger that sandboxed the plist FILE path must also
        # short-circuit every `launchctl bootstrap/load/unload/enable` against
        # that path. Otherwise a user who reproduces a bug under /private/tmp/
        # or /var/folders/ ends up with sandboxed plists registered in their
        # real gui/<uid> domain — when the tmp dir is cleaned, the agents become
        # ghosts that fire forever (the historical 23:13 CST Terminal popup).
        export _LAUNCHD_SKIP_REGISTRY=1
      fi
      unset _roll_in_test_ctx _roll_caller
      ;;
  esac
fi
: "${_LAUNCHD_DIR:=${HOME}/Library/LaunchAgents}"

_config_read_int() {
  local key="$1" default="$2"
  local val
  val=$(config_get "$key" "")
  if [[ "$val" =~ ^[0-9]+$ ]]; then echo "$val"; else echo "$default"; fi
}

# REFACTOR-031: cross-platform file mtime in epoch seconds.
# Replaces the `stat -c %Y ... || stat -f %m ... || echo 0` pattern that was
# copy-pasted in four places (dashboard age widgets, briefs, dream, peer).
_file_mtime() {
  stat -c %Y "$1" 2>/dev/null || stat -f %m "$1" 2>/dev/null || echo 0
}

# Derive a minute in [1,55] from project path hash + offset so different projects
# and different services within a project don't fire at the same time.
# Offsets used: loop=0, dream=2, brief=4 → always three distinct values (2<55).
_loop_derive_minute() {
  local project_path="$1" offset="${2:-0}"
  local hash_hex
  if command -v md5 &>/dev/null; then
    hash_hex=$(printf '%s' "$project_path" | md5 | cut -c1-6)
  else
    hash_hex=$(printf '%s' "$project_path" | md5sum | cut -c1-6)
  fi
  local hash_dec; hash_dec=$(printf '%d' "0x${hash_hex}")
  echo $(( (hash_dec + offset) % 55 + 1 ))
}

# US-LOOP-032: validate a (period, offset) pair.  Period 1–1440.
# offset_minute is deprecated (US-LOOP-032); still accepted for backward
# compat but plist generation ignores it (uses StartInterval = period*60).
_loop_schedule_valid() {
  local period="$1" offset="$2"
  [[ "$period" =~ ^[0-9]+$ ]] || return 1
  if (( period < 1 || period > 1440 )); then return 1; fi
  [[ "$offset" =~ ^[0-9]+$ ]] || return 1
  if (( offset >= 60 )); then return 1; fi
  return 0
}

# US-LOOP-011: compute the loop schedule spec for a project.
# Resolution order:
#   1. .roll/local.yaml  loop_schedule.{period_minutes,offset_minute}
#   2. ~/.roll/config.yaml  loop_minute  → period=60, offset=loop_minute
#   3. default              period=60, offset=hash(project_path)%60
# Output: "<period> <offset>" on stdout. Exit 0 on success.
# Invalid project config → fallback to global/default + write ALERT.
_loop_schedule_spec() {
  local project_path="$1"

  # 1. Try project-level .roll/local.yaml
  local local_file="${project_path}/.roll/local.yaml"
  if [[ -f "$local_file" ]]; then
    local local_period local_offset
    # Extract values from under loop_schedule: key (using awk for reliable block parsing)
    local_period=$(awk '/^loop_schedule:/{found=1;next} found && /^[[:space:]]+period_minutes:/{print $2; exit}' "$local_file")
    local_offset=$(awk '/^loop_schedule:/{found=1;next} found && /^[[:space:]]+offset_minute:/{print $2; exit}' "$local_file")
    if [[ -n "$local_period" && -n "$local_offset" ]]; then
      if _loop_schedule_valid "$local_period" "$local_offset"; then
        # US-LOOP-032: offset_minute is deprecated when period doesn't divide 60.
        # Warn but don't fail.
        if (( 60 % local_period != 0 )) && [[ "$local_offset" != "0" ]]; then
          echo "roll: warning: offset_minute is deprecated (period=${local_period}, offset=${local_offset}). Use period_minutes only." >&2
        fi
        echo "$local_period $local_offset"
        return 0
      fi
      # Invalid: alert, then fall through to global/default
      local slug; slug=$(_project_slug "$project_path")
      local alert_file="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/ALERT-${slug}.md"
      mkdir -p "$(dirname "$alert_file")" 2>/dev/null || true
      {
        printf '## ⚠️ US-LOOP-032: Invalid loop_schedule\n\n'
        printf '**Time**: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')"
        printf '**Source**: %s\n\n' "${project_path}/.roll/local.yaml"
        printf '**Values**: period_minutes=%s, offset_minute=%s\n\n' "$local_period" "$local_offset"
        printf '**Action**: period must be 1–1440; offset must be 0–59. Falling back to default (period=60).\n\n'
        printf '%s\n' '---'
      } >> "$alert_file"
    fi
  fi

  # 2. Try global ~/.roll/config.yaml loop_minute (backward compat)
  local global_minute
  global_minute=$(_config_read_int "loop_minute" "")
  if [[ -n "$global_minute" && "$global_minute" =~ ^[0-9]+$ ]]; then
    echo "60 $global_minute"
    return 0
  fi

  # 3. Default: derive from project path hash (never collides across projects)
  local offset
  offset=$(_loop_derive_minute "$project_path" 0)
  echo "60 $offset"
}

# Read loop active window from .roll/local.yaml loop_schedule block.
# Resolution order:
#   1. .roll/local.yaml  loop_schedule.{active_start,active_end}
#   2. default           0 / 24  (full day)
# Validation: both values must be integers 0–24, active_start < active_end.
# Output: "<start> <end>" on stdout.
_loop_read_active_window() {
  local project_path="${1:-$(pwd -P)}"
  local local_file="${project_path}/.roll/local.yaml"
  if [[ -f "$local_file" ]]; then
    local val_start val_end
    val_start=$(awk '/^loop_schedule:/{found=1;next} found && /^[[:space:]]+active_start:/{print $2; exit}' "$local_file")
    val_end=$(awk '/^loop_schedule:/{found=1;next} found && /^[[:space:]]+active_end:/{print $2; exit}' "$local_file")
    if [[ "$val_start" =~ ^[0-9]+$ && "$val_end" =~ ^[0-9]+$ ]] \
        && (( val_start < val_end && val_end <= 24 )); then
      echo "$val_start $val_end"
      return 0
    fi
  fi
  echo "0 24"
}

# US-LOOP-032: human-readable schedule description.
# Args: period offset [lang]
#   lang: en (default) or zh
# For periods that divide 60, shows clock-aligned slots (e.g. "every 30min (:00 :30)").
# For non-divisor periods (US-LOOP-032), shows just the interval (e.g. "every 45min").
_loop_schedule_desc() {
  local period="$1" offset="$2" lang="${3:-en}"
  if [[ "$period" -eq 60 ]]; then
    if [[ "$lang" == "zh" ]]; then
      printf '%s' "$(msg_lang "$lang" agent.hourly_at_02d "$offset")"
    else
      printf "every hour :%02d" "$offset"
    fi
    return 0
  fi
  # US-LOOP-032: only show clock-aligned slots when period divides 60
  if (( 60 % period == 0 )); then
    local times="" slots=$((60 / period)) i m
    for i in $(seq 0 $((slots - 1))); do
      m=$((offset + i * period))
      times="${times} :$(printf '%02d' "$m")"
    done
    if [[ "$lang" == "zh" ]]; then
      printf '%s' "$(msg_lang "$lang" agent.every_d_min_s "$period" "${times# }")"
    else
      printf "every %dmin (%s)" "$period" "${times# }"
    fi
  else
    if [[ "$lang" == "zh" ]]; then
      printf '每%d分鐘' "$period"
    else
      printf "every %dmin" "$period"
    fi
  fi
}

# US-LOOP-001: structured event emission for cycle observability.
# Writes a tab-separated line to stdout (for tmux/attach display) and appends
# a JSON line to the per-project NDJSON event file under _SHARED_ROOT/loop/.
# Args: <stage> <label> <detail> <outcome>
_loop_event() {
  local stage="$1" label="$2" detail="$3" outcome="$4"
  local ts slug evfile json
  ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
  slug=$(_project_slug 2>/dev/null || basename "$PWD")
  # US-LOOP-020: prefer project-local .roll/loop/; fallback to shared root
  # for transient slugs (worktree cleanup, orphan recovery) where
  # _loop_runtime_dir cannot resolve a project path.
  local _rt_dir
  _rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  if [ -n "$_rt_dir" ]; then
    evfile="${_rt_dir}/events.ndjson"
  else
    evfile="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/events-${slug}.ndjson"
  fi
  # FIX-065 tripwire: in a test context (BATS or temp cwd), refuse to write
  # into production ~/.shared/roll/. Catching this in code is the last line
  # of defense if some unusual path bypassed the auto-sandbox at source-time.
  # Skipped when HOME itself has been redirected to a sandbox dir — then
  # $HOME/.shared/roll IS the sandbox, not prod.
  case "${HOME:-}" in
    /tmp/*|/private/tmp/*|*/var/folders/*|*/tmp.*) ;;
    *)
      if [ -n "${HOME:-}" ] && [ "${evfile#${HOME}/.shared/roll/}" != "$evfile" ]; then
        case "${BATS_TEST_FILENAME:-}${PWD}" in
          */tmp.*|*/var/folders/*|/tmp/*|/private/tmp/*|*.bats)
            echo "[FIX-065] refusing prod _loop_event write: $evfile (test context)" >&2
            return 1
            ;;
        esac
      fi
      ;;
  esac
  mkdir -p "$(dirname "$evfile")"
  # FIX-157: ensure file exists before append; some contexts (launchd+inner)
  # silently drop >> when the target is missing.
  touch "$evfile" 2>/dev/null || true

  # US-LOOP-007: human-friendly stdout for phase_* stages so tmux readers
  # spot phase boundaries amid claude output. Other stages keep the legacy
  # tab-separated format (consumers like tests grep on it).
  case "$stage" in
    phase_start)
      local _emoji
      case "$label" in
        startup) _emoji="🚀" ;;
        preflight) _emoji="🔍" ;;
        worktree_setup) _emoji="🌳" ;;
        agent_invoke) _emoji="🤖" ;;
        publish_push) _emoji="📤" ;;
        publish_wait_merge) _emoji="⏳" ;;
        cleanup) _emoji="🧹" ;;
        *) _emoji="▶" ;;
      esac
      printf '%s %-22s ─────────\n' "$_emoji" "$label"
      ;;
    phase_tick)
      printf '   ⏱  %-20s ───── %s\n' "$label" "$detail"
      ;;
    phase_end)
      local _mark
      case "$outcome" in
        fail) _mark="✗" ;;
        *) _mark="✓" ;;
      esac
      printf '   %s %-20s ───── %s\n' "$_mark" "$label" "$detail"
      ;;
    *)
      printf '%s\t%s\t%s\t%s\t%s\n' "$ts" "$stage" "$label" "$detail" "$outcome"
      ;;
  esac

  # JSON line appended to NDJSON file. FIX-067: drop the flock/lockf guard.
  # POSIX requires write() ≤ PIPE_BUF (≥512 bytes, 4 KiB on Linux/macOS) to
  # a file opened O_APPEND be atomic across concurrent writers, and a single
  # JSONL event line is well under that limit. The old lockfile path could
  # stall the EXIT trap added in FIX-066 when the lockfile state was
  # inconsistent, leaving cycle_end unwritten when the outer SIGHUP fired —
  # defeating FIX-066's whole purpose.
  json=$(printf '{"ts":"%s","stage":"%s","label":"%s","detail":"%s","outcome":"%s"}\n' \
    "$ts" "$stage" "$label" "$detail" "$outcome")
  if ! printf '%s\n' "$json" >> "$evfile"; then
    echo "[loop] _loop_event: failed to append to $evfile" >&2
  fi

  # File rotation: if >10MB, rotate keeping last 5
  _loop_event_rotate "$evfile"
}

_loop_event_rotate() {
  local f="$1"
  local size
  size=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f" 2>/dev/null || echo 0)
  if [ "$size" -gt 10485760 ]; then
    # rotate: .4→remove, .3→.4, .2→.3, .1→.2, current→.1
    rm -f "${f}.4"
    for i in 3 2 1; do
      [ -f "${f}.$i" ] && mv "${f}.$i" "${f}.$((i+1))"
    done
    mv "$f" "${f}.1"
    touch "$f"
  fi
}

# FIX-151: write a lightweight tick heartbeat for dedicated loops (pr/ci/alert).
# Appends one JSONL line per tick; rotates by line count to control bloat.
_loop_write_tick() {
  local loop_type="${1:-}" outcome="${2:-idle}" note="${3:-}"
  [ -n "$loop_type" ] || return 0
  local slug tick_file
  slug=$(_project_slug 2>/dev/null || basename "$PWD")
  local _rt_dir
  _rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  if [ -n "$_rt_dir" ]; then
    tick_file="${_rt_dir}/${loop_type}-tick.jsonl"
  else
    tick_file="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/${loop_type}-tick-${slug}.jsonl"
  fi
  mkdir -p "$(dirname "$tick_file")" 2>/dev/null || true
  local ts
  ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
  printf '{"ts":"%s","loop":"%s","outcome":"%s","note":"%s"}\n' \
    "$ts" "$loop_type" "$outcome" "$note" >> "$tick_file"

  # Rotate: alert loop (1 min) → 1000 lines (~16h); ci/pr (5 min) → 500 lines (~42h)
  local max_lines=500
  [ "$loop_type" = "alert" ] && max_lines=1000
  local line_count
  line_count=$(wc -l < "$tick_file" 2>/dev/null | tr -d ' \t' || echo 0)
  case "$line_count" in ''|*[!0-9]*) line_count=0 ;; esac
  if [ "$line_count" -gt "$max_lines" ]; then
    tail -n "$max_lines" "$tick_file" > "${tick_file}.tmp" && mv "${tick_file}.tmp" "$tick_file"
  fi
}

# FIX-151: read the last tick line from a dedicated loop's tick file.
# Optional second arg selects a JSON field (ts, loop, outcome, note).
_loop_read_last_tick() {
  local loop_type="${1:-}" field="${2:-}"
  [ -n "$loop_type" ] || return 0
  local slug tick_file
  slug=$(_project_slug 2>/dev/null || basename "$PWD")
  local _rt_dir
  _rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
  if [ -n "$_rt_dir" ]; then
    tick_file="${_rt_dir}/${loop_type}-tick.jsonl"
  else
    tick_file="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/${loop_type}-tick-${slug}.jsonl"
  fi
  [ -f "$tick_file" ] || return 0
  local last
  last=$(tail -1 "$tick_file" 2>/dev/null || echo "")
  [ -n "$last" ] || return 0
  if [ -n "$field" ]; then
    echo "$last" | awk -F'"' '{for(i=2;i<NF;i+=2) if($i=="'"$field"'") {print $(i+2); exit}}' 2>/dev/null || echo ""
  else
    printf '%s\n' "$last"
  fi
}

# FIX-151: compute human-readable age of the last tick for a dedicated loop.
# Prints something like "5s", "3m", "2h" or empty string if no tick.
_loop_tick_age() {
  local loop_type="${1:-}"
  [ -n "$loop_type" ] || return 0
  local ts
  ts=$(_loop_read_last_tick "$loop_type" "ts")
  [ -n "$ts" ] || return 0
  local tick_epoch now_epoch age
  tick_epoch=$(date -d "$ts" +%s 2>/dev/null || date -jf "%Y-%m-%dT%H:%M:%SZ" "$ts" +%s 2>/dev/null || echo 0)
  [ "$tick_epoch" -gt 0 ] || return 0
  now_epoch=$(date +%s)
  age=$(( now_epoch - tick_epoch ))
  if [ "$age" -lt 60 ]; then
    echo "${age}s"
  elif [ "$age" -lt 3600 ]; then
    echo "$(( age / 60 ))m"
  else
    echo "$(( age / 3600 ))h"
  fi
}

# US-OBS-014: after a loop cycle reaches a terminal cycle_end (done or idle),
# fire a best-effort, background status-snapshot push to roll-meta so the
# remote-watch prompt always sees ≤35min-fresh data — no user-side cron needed.
#
# Reads the optional `roll_meta_dir` config key (config_get already ~-expands).
# Fully skips with zero output when it is unset. When set but the directory is
# missing, prints one WARNING (the inner runner redirects stdout/stderr to
# cron.log) and skips. The push script itself is invoked in the background and
# is hard-killed after 60s; any failure/timeout is logged to push-status.log
# (rotated at 1MB, keeping 2 copies) and never aborts the cycle or sets an ALERT.
_loop_push_status_snapshot() {
  local meta_dir
  meta_dir=$(config_get roll_meta_dir "")
  # Unconfigured → complete no-op, zero output (preserves prior behaviour).
  [ -z "$meta_dir" ] && return 0
  if [ ! -d "$meta_dir" ]; then
    echo "WARNING: roll_meta_dir '${meta_dir}' does not exist; skipping status push" >&2
    return 0
  fi
  local script="${meta_dir}/ops/push-loop-status.sh"
  # Configured dir exists but the push script isn't there (e.g. older roll-meta
  # checkout) → silent skip; nothing to push.
  [ -f "$script" ] || return 0

  local logf="${_SHARED_ROOT:-${HOME}/.shared/roll}/push-status.log"
  mkdir -p "$(dirname "$logf")" 2>/dev/null || true
  # Rotate at 1MB, keep 2 copies (.1) — same policy as watch.sh's push log.
  local _sz
  _sz=$(stat -f%z "$logf" 2>/dev/null || stat -c%s "$logf" 2>/dev/null || echo 0)
  if [ "$_sz" -gt 1048576 ]; then
    rm -f "${logf}.2"
    [ -f "${logf}.1" ] && mv "${logf}.1" "${logf}.2"
    mv "$logf" "${logf}.1"
    touch "$logf"
  fi

  # Background, best-effort. A portable watchdog hard-kills the push after 60s
  # (`timeout` is GNU-only and absent on stock macOS) so a hung git push can
  # never stall the next cycle.
  (
    bash "$script" "$meta_dir" >> "$logf" 2>&1 &
    local _push_pid=$!
    local _waited=0
    while kill -0 "$_push_pid" 2>/dev/null; do
      if [ "$_waited" -ge 60 ]; then
        kill -TERM "$_push_pid" 2>/dev/null
        echo "$(date '+%Y-%m-%dT%H:%M:%S%z') push-loop-status timeout (>60s); killed pid ${_push_pid}" >> "$logf"
        break
      fi
      sleep 1
      _waited=$((_waited + 1))
    done
  ) &
}

# FIX-050: probe brew prefix + common tool dirs to build a PATH that survives
# launchd/cron's bare-env launch. Setup-time companion to the runtime
# assembly snippet embedded in runner scripts.
_detect_path_prepend() {
  local dirs=() seen="" d out=""
  if command -v brew >/dev/null 2>&1; then
    local bp; bp=$(brew --prefix 2>/dev/null || true)
    [[ -n "$bp" && -d "$bp/bin" ]] && dirs+=("$bp/bin")
  fi
  [[ -d /opt/homebrew/bin ]] && dirs+=("/opt/homebrew/bin")
  [[ -d /usr/local/bin ]] && dirs+=("/usr/local/bin")
  [[ -d /opt/local/bin ]] && dirs+=("/opt/local/bin")
  [[ -d "$HOME/.local/bin" ]] && dirs+=("$HOME/.local/bin")
  # FIX-129: kimi-code installs to ~/.kimi-code/bin (not brew/local), launchd misses it
  [[ -d "$HOME/.kimi-code/bin" ]] && dirs+=("$HOME/.kimi-code/bin")
  dirs+=("/usr/bin" "/bin" "/usr/sbin" "/sbin")
  for d in "${dirs[@]}"; do
    case ":$seen:" in *":$d:"*) continue ;; esac
    seen="$seen:$d"
    [[ -z "$out" ]] && out="$d" || out="$out:$d"
  done
  printf '%s' "$out"
}

_launchd_label() {
  local service="$1" project_path="$2"
  printf 'com.roll.%s.%s' "$service" "$(_project_slug "$project_path")"
}

# FIX-097: central skip predicate consulted by every launchctl invocation that
# operates on a plist path Roll wrote. Returns 0 (skip) when either:
#   - explicit: _LAUNCHD_SKIP_REGISTRY=1 was exported (tests, future opt-out)
#   - implicit: _LAUNCHD_DIR is a child of _SHARED_ROOT (auto-sandbox active)
# Returns 1 (do not skip) in production.
#
# History: FIX-090 introduced the same logic INSIDE _install_launchd_plists.
# FIX-097 hoists it to a helper because the bootstrap call inside
# _install_launchd_plists was not the only leak: _loop_on / _loop_off /
# _loop_pause / _loop_resume each had bare `launchctl load/unload/enable`
# calls that bypassed the gate.
_launchd_should_skip_registry() {
  [[ "${_LAUNCHD_SKIP_REGISTRY:-}" == "1" ]] && return 0
  case "${_LAUNCHD_DIR:-}/" in
    "${_SHARED_ROOT:-/nonexistent}"/*) return 0 ;;
  esac
  return 1
}

_launchd_plist_path() {
  local service="$1" project_path="$2"
  printf '%s/%s.plist' "$_LAUNCHD_DIR" "$(_launchd_label "$service" "$project_path")"
}

_write_launchd_plist() {
  local plist_path="$1" label="$2" project_path="$3"
  local period="$4" offset="$5" hour="$6" runner_script="$7"

  # FIX-087 tripwire: last line of defense if some caller explicitly set
  # _LAUNCHD_DIR back to the real path (or built plist_path manually) while
  # in a test context. Refuse rather than pollute the user's launchd domain.
  # Skipped when HOME itself has been redirected to a sandbox dir — then
  # $HOME/Library/LaunchAgents IS the sandbox, not prod.
  case "${HOME:-}" in
    /tmp/*|/private/tmp/*|*/var/folders/*|*/tmp.*) ;;
    *)
      if [ -n "${HOME:-}" ] && [ "${plist_path#${HOME}/Library/LaunchAgents/}" != "$plist_path" ]; then
        case "${BATS_TEST_FILENAME:-}${PWD}" in
          */tmp.*|*/var/folders/*|/tmp/*|/private/tmp/*|*.bats)
            echo "[FIX-087] refusing prod plist write: $plist_path (test context)" >&2
            return 1
            ;;
        esac
      fi
      ;;
  esac

  # FIX-050: bake PATH into the plist so launchd-spawned bash can find tmux,
  # claude, node, etc. The runner script also re-asserts PATH at runtime as
  # a second layer (covers stale plists where brew was installed after setup).
  local path_value; path_value=$(_detect_path_prepend)

  # FIX-148 (owner decision B) / FIX-105 / US-LOOP-035: on macOS 26.x launchd
  # SILENTLY refuses to FIRE a StartCalendarInterval that carries Hour+Minute, so
  # daily services (dream + brief) never run. The FIX-105 known-good workaround is
  # to force daily services to StartInterval=86400; that is the verified default.
  # US-LOOP-035's *array-style* StartCalendarInterval (one-element array of a dict)
  # is UNVERIFIED on macOS 26.x, so it is an explicit OPT-IN only: set
  # ROLL_DREAM_CALENDAR=1 to emit the array-style Hour+Minute schedule.
  # US-LOOP-032: non-daily loop services use StartInterval = period * 60.
  local schedule_xml
  if [[ -n "$hour" ]]; then
    if [[ "${ROLL_DREAM_CALENDAR:-}" == "1" ]]; then
      # OPT-IN (unverified on macOS 26.x): array-style StartCalendarInterval.
      # Daily services carry the fire minute in $offset ($5); $hour is $6.
      local cal_minute="${offset:-0}"
      schedule_xml="  <key>StartCalendarInterval</key>
  <array>
    <dict>
      <key>Hour</key>
      <integer>${hour}</integer>
      <key>Minute</key>
      <integer>${cal_minute}</integer>
    </dict>
  </array>"
    else
      # DEFAULT (FIX-105 known-good): StartInterval=86400 so the service fires.
      schedule_xml="  <key>StartInterval</key>
  <integer>86400</integer>"
    fi
  else
    # US-LOOP-032: StartInterval in seconds = period_minutes * 60
    local interval=$(( period * 60 ))
    schedule_xml="  <key>StartInterval</key>
  <integer>${interval}</integer>"
  fi

  local content
  content="<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">
<plist version=\"1.0\">
<dict>
  <key>Label</key>
  <string>${label}</string>
  <key>ProgramArguments</key>
  <array>
    <string>/bin/bash</string>
    <string>-l</string>
    <string>${runner_script}</string>
  </array>
  <key>EnvironmentVariables</key>
  <dict>
    <key>PATH</key>
    <string>${path_value}</string>
  </dict>
${schedule_xml}
  <key>WorkingDirectory</key>
  <string>${project_path}</string>
</dict>
</plist>"

  if [[ -f "$plist_path" ]] && [[ "$(cat "$plist_path")" == "$content" ]]; then
    return 0
  fi
  printf '%s\n' "$content" > "$plist_path"
}

_write_runner_script() {
  local script_path="$1" project_path="$2" cmd="$3" log_path="$4"
  mkdir -p "$(dirname "$script_path")"
  printf '#!/bin/bash -l\ncd "%s" && %s >> "%s" 2>&1\n' "$project_path" "$cmd" "$log_path" > "$script_path"
  chmod +x "$script_path"
}

# _write_pr_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
#   US-AUTO-044 Phase 2: the script the com.roll.pr.<slug> launchd plist runs
#   every 5 min. Lightweight (no agent, no tmux): portable PATH, a single-flight
#   re-entry lock (pid+ts, 15-min staleness so a crashed pass self-heals next
#   tick), then drives the existing _loop_pr_inbox orchestrator via the
#   `roll _loop_pr_inbox` dispatch. _loop_pr_inbox stays sole writer of pr_state
#   (kimi peer-review Q1 — main loop no longer touches it after Phase 2).
_write_pr_loop_runner_script() {
  local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
  mkdir -p "$(dirname "$script_path")"
  local lock="${project_path}/.roll/loop/.pr-loop.lock"
  cat > "$script_path" << PRRUNNER
#!/bin/bash -l
set -o pipefail
# Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
  case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
done
export PATH
# Single-flight re-entry guard: one PR-loop pass at a time. 5-min cadence;
# 15-min (900s) staleness so a crashed/hung pass self-heals on the next tick.
LOCK="${lock}"
mkdir -p "\$(dirname "\$LOCK")"
if [ -f "\$LOCK" ]; then
  _pp=""; _pt=""
  IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
  _now=\$(date -u +%s)
  if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 900 ]; then
    exit 0
  fi
  rm -f "\$LOCK"
fi
printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
trap 'rm -f "\$LOCK"' EXIT
cd "${project_path}" || exit 0
bash "${roll_bin}" _loop_pr_inbox >> "${log_path}" 2>&1 || true
PRRUNNER
  chmod +x "$script_path"
}

# Like _write_runner_script but prepends an active window guard.
# Silently exits when current hour is outside [active_start, active_end).
# When tmux is available, wraps the inner command in a detached tmux session
# named `roll-loop-<slug>` so `roll loop attach` can watch in real time.
# Falls back to headless execution when tmux is not installed.
_write_loop_runner_script() {
  local script_path="$1" project_path="$2" cmd="$3" log_path="$4"
  local active_start="${5:-10}" active_end="${6:-18}"
  # FIX-134: skill md path. When set, the inner script rebuilds the agent
  # command at runtime from the routed cycle agent; when empty it falls back to
  # the baked command (backwards compatible with callers that omit it).
  local skill_path="${7:-}"
  # FIX-054: terminal preference detection removed. Popup is hard-coded to
  # macOS Terminal.app; the 7th positional arg, if any, is ignored for
  # backwards compatibility with existing callers.
  mkdir -p "$(dirname "$script_path")"

  local inner_path="${script_path%.sh}-inner.sh"
  # Use stream-json + formatter: --verbose alone does nothing in -p mode;
  # stream-json enables realtime streaming; loop-fmt.py humanizes the events.
  local fmt_script="${ROLL_PKG_DIR}/lib/loop-fmt.py"
  # US-LOOP-026: post-cycle single-shot usage writer for non-claude agents.
  # pi -p text mode prints no usage, so we recover it from pi's session jsonl
  # exactly once per cycle (loop-fmt passthrough is display-only).
  # FIX-154: kimi-code's `-p` mode also writes nothing to stdout but persists
  # usage to wire.jsonl; kimi_emit covers that path. bin/roll dispatches by
  # agent (pi/deepseek → pi_emit, kimi → kimi_emit).
  local pi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/pi_emit.py"
  local kimi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/kimi_emit.py"
  local roll_bin="${ROLL_PKG_DIR}/bin/roll"
  # US-EVAL-002: pure-function rubric scorer (US-EVAL-001). Baked in at
  # generation time so the inner runner can compute result_eval at cycle finish.
  local eval_script="${ROLL_PKG_DIR}/lib/loop_result_eval.py"
  # FIX-041: loop cycle is autonomous — permission prompts and sandbox path
  # restrictions only cause the cycle to burn turns asking for approvals
  # it cannot receive. Bypass all permission checks for the inner claude
  # invocation. Worktree isolation contains the blast radius.
  local cmd_verbose="${cmd/claude -p/claude -p --verbose --dangerously-skip-permissions --output-format stream-json}"
  # US-AUTO-037: strip leading `cd "<path>" && ` (callers like
  # _install_launchd_plists prepend it). The runner now manages cwd itself
  # — pointing at the worktree when isolation succeeds, project_path otherwise.
  local agent_cmd; agent_cmd="${cmd_verbose#cd \"*\" && }"
  # FIX-048: Claude Code resolves project root from the worktree's .git file to
  # the main repo, placing worktree absolute paths outside its sandbox. Inject
  # --add-dir "$WT" so the worktree directory is explicitly allowed. Only applies
  # to claude (the --output-format stream-json flag is exclusive to claude runs).
  if [[ "$agent_cmd" == *"--output-format stream-json"* ]]; then
    agent_cmd="${agent_cmd/--output-format stream-json/--output-format stream-json --add-dir \"\$WT\"}"
  fi
  local slug; slug=$(_project_slug "$project_path")
  # FIX-134: emit a runtime command-builder line when skill_path is known, so
  # the cycle agent is resolved live (routing-aware). Otherwise leave _CYCLE_CMD
  # empty and the inner script uses the baked fallback command below.
  local cycle_cmd_line
  if [[ -n "$skill_path" ]]; then
    cycle_cmd_line="_CYCLE_CMD=\$(_loop_cycle_agent_cmd \"${skill_path}\" \"\$CYCLE_AGENT\" \"\$WT\" 2>/dev/null || true)"
  else
    cycle_cmd_line="_CYCLE_CMD="
  fi
  cat > "$inner_path" << INNER
#!/bin/bash -l
set -o pipefail
# FIX-050: portable PATH assembly — launchd/cron deliver a bare PATH that
# misses brew-installed tools (tmux, claude, node, …). Iterate candidate
# dirs; only prepend when present and not already in PATH. Idempotent.
for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
  case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
done
export PATH
# FIX-031: inner-level LOCK (PID + start-ts) — outer runner.sh LOCK can be
# bypassed (recovery / retry / direct invocation); this guards the actual
# claude invocation so a second session can't run under the same project.
INNER_LOCK="\$(dirname "\$0")/.INNER-LOCK-\$(basename "\$0" -inner.sh | sed 's/^run-//')"
if [ -f "\$INNER_LOCK" ]; then
  _prev_pid=""; _prev_ts=""
  IFS=: read -r _prev_pid _prev_ts < "\$INNER_LOCK" 2>/dev/null || true
  _now=\$(date -u +%s)
  if [ -n "\$_prev_pid" ] && [ -n "\$_prev_ts" ] \\
     && kill -0 "\$_prev_pid" 2>/dev/null \\
     && [ "\$((_now - _prev_ts))" -lt 14400 ]; then
    echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] inner loop already running (PID \$_prev_pid), skipping"
    trap '_inner_cleanup' EXIT
    exit 0
  fi
  rm -f "\$INNER_LOCK"
fi
printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$INNER_LOCK"
CURRENT_PHASE=""
# bash 3.2 (macOS /bin/bash) lacks associative arrays — use namespaced
# variables via 'printf -v' + indirect '\${!VAR}' expansion instead.
# _PHASE_START_<name>  stores the unix-second start ts per phase
# _PHASE_DUR_<name>    stores the computed duration in seconds per phase
# _PHASE_NAMES_DONE    space-separated list of completed phase names, in order
_PHASE_NAMES_DONE=""
_phase_begin() {
  local _name="\$1"
  printf -v "_PHASE_START_\${_name}" '%s' "\$(date +%s)"
  CURRENT_PHASE="\$_name"
  # FIX-136: write phase+start_ts to file so forked heartbeat child can read it
  printf '%s %s' "\$_name" "\$(date +%s)" > "\$HEARTBEAT_PHASE_FILE"
  _loop_event phase_start "\$_name" "" "" || true
}
_phase_end() {
  local _name="\$1" _outcome="\${2:-ok}"
  local _start_var="_PHASE_START_\${_name}"
  local _start="\${!_start_var:-\$(date +%s)}"
  local _dur=\$(( \$(date +%s) - _start ))
  [ "\$_dur" -lt 0 ] && _dur=0
  printf -v "_PHASE_DUR_\${_name}" '%s' "\$_dur"
  case " \$_PHASE_NAMES_DONE " in *" \$_name "*) ;; *) _PHASE_NAMES_DONE="\${_PHASE_NAMES_DONE} \$_name" ;; esac
  CURRENT_PHASE=""
  # FIX-136: clear phase file so heartbeat knows no active phase
  echo -n > "\$HEARTBEAT_PHASE_FILE" 2>/dev/null || true
  _loop_event phase_end "\$_name" "\${_dur}s" "\$_outcome" || true
}
_heartbeat_writer() {
  while true; do
    echo "\$(date -u +%s)" > "\$HEARTBEAT_FILE"
    # FIX-136: read phase from file — CURRENT_PHASE is inherited at fork and
    # never updated. The parent writes phase+start_ts on _phase_begin, clears
    # on _phase_end. No phase file = no active phase = skip tick.
    if [ -f "\$HEARTBEAT_PHASE_FILE" ]; then
      read -r _hb_phase _hb_start_ts < "\$HEARTBEAT_PHASE_FILE" 2>/dev/null || true
      if [ -n "\$_hb_phase" ] && [ -n "\$_hb_start_ts" ]; then
        local _el=\$(( \$(date +%s) - _hb_start_ts ))
        _loop_event phase_tick "\$_hb_phase" "\${_el}s elapsed" "" 2>/dev/null || true
      fi
    fi
    sleep 60
  done
}
# FIX-138: _heartbeat_writer is started AFTER sourcing bin/roll below, NOT here.
# Backgrounding a function forks a subshell snapshot of the current shell;
# _loop_event (defined in bin/roll, sourced ~240 lines later) is undefined in
# that snapshot, so every phase_tick call silently failed (2>/dev/null || true)
# -> zero heartbeat across the whole cycle. The publish_wait_merge ticks seen
# were its own 30s poll loop, not this writer. Starting post-source fixes it.
# FIX-057: cycle hard timeout — 45 minute SLA per loop cycle. If a cycle runs
# longer, kill claude / loop-fmt.py / all backgrounded children, mark the
# in-progress backlog item Blocked (caller decides), and exit cleanly so the
# next cron tick can proceed. Overridable via env for tests.
LOOP_CYCLE_TIMEOUT_SEC="\${ROLL_LOOP_CYCLE_TIMEOUT_SEC:-2700}"
_CYCLE_TIMED_OUT=0
# IDEA-028 / FIX-066: track whether cycle_end has been emitted via any of the
# explicit completion paths (publish/merge_back/orphan-push/claude-failed).
# When zero, the EXIT trap emits a fallback so cycle_start never orphans the
# dashboard into a phantom "still running" row.
_CYCLE_END_WRITTEN=0
_on_sigterm() { _CYCLE_TIMED_OUT=1; }
trap '_on_sigterm' TERM
# US-LOOP-005: idempotent runs.jsonl writer shared by normal exit, timeout
# trap, and worktree-setup-failure early exit. Guards on jq + run_id dedupe so
# multiple callers in the same cycle are safe.
# US-LOOP-008: build a JSON object {"<phase>": <duration_sec>, ...} from
# the ordered list of completed phases. Returns "{}" if no phases ran.
_phases_to_json() {
  command -v jq >/dev/null 2>&1 || { printf '{}'; return 0; }
  local _name _var _dur _args="" _filter="{}"
  local _first=1
  for _name in \$_PHASE_NAMES_DONE; do
    [ -z "\$_name" ] && continue
    _var="_PHASE_DUR_\${_name}"
    _dur="\${!_var:-0}"
    if [ "\$_first" -eq 1 ]; then
      _filter="{\\"\${_name}\\": \\\$d_\${_name}}"
      _first=0
    else
      _filter="\${_filter} + {\\"\${_name}\\": \\\$d_\${_name}}"
    fi
    _args="\${_args} --argjson d_\${_name} \${_dur}"
  done
  if [ "\$_first" -eq 1 ]; then
    printf '{}'
  else
    eval "jq -nc \${_args} '\${_filter}'" 2>/dev/null || printf '{}'
  fi
}

# US-LOOP-008: print phase breakdown panel sorted by duration desc.
# Idle/failed/aborted cycles only list phases that actually entered — no
# placeholder rows. Panel is best-effort (skips silently if no phases).
_print_phase_breakdown() {
  [ -n "\$_PHASE_NAMES_DONE" ] || return 0
  local _name _var _dur _total=0 _rows=""
  for _name in \$_PHASE_NAMES_DONE; do
    [ -z "\$_name" ] && continue
    _var="_PHASE_DUR_\${_name}"
    _dur="\${!_var:-0}"
    _total=\$(( _total + _dur ))
    _rows="\${_rows}\${_dur} \${_name}\n"
  done
  [ "\$_total" -le 0 ] && _total=1
  printf '\\n─── Cycle %s Phase Breakdown ───\\n' "\${CYCLE_ID:-unknown}"
  printf '%b' "\$_rows" | sort -rn | while read -r _d _n; do
    [ -z "\$_n" ] && continue
    local _pct=\$(( (_d * 1000) / _total ))
    local _pct_str
    _pct_str=\$(printf '%d.%d%%' \$(( _pct / 10 )) \$(( _pct % 10 )))
    local _bar="" _bar_len=\$(( (_d * 20) / _total ))
    [ "\$_bar_len" -gt 0 ] && _bar=\$(printf '█%.0s' \$(seq 1 \$_bar_len))
    printf '  %-22s %6ds  (%6s)  %s\\n' "\$_n" "\$_d" "\$_pct_str" "\$_bar"
  done
  printf '  %s\\n' "──────────────────────────────────────"
  printf '  %-22s %6ds\\n\\n' "Total" "\$_total"
}

_runs_append() {
  local _status="\$1"; local _tcr="\${2:-0}"; local _built="\${3:-[]}"
  # bash parameter expansion \${4:-{}} stops at the first \} so the default
  # leaks a trailing \} into a real 4th arg ("{...}}"). Test explicit empty.
  local _phases_json="\${4:-}"
  [ -z "\$_phases_json" ] && _phases_json="{}"
  # US-LOOP-020: write the cycle row to the project-local runs.jsonl so each
  # project owns its own cycle history. Falls back to the shared root only for
  # transient slugs where _LOOP_RT_DIR could not be resolved at startup.
  local _runs_dst
  if [ -n "\${_LOOP_RT_DIR:-}" ]; then
    _runs_dst="\${_LOOP_RT_DIR}/runs.jsonl"
  else
    _runs_dst="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/runs.jsonl"
  fi
  command -v jq >/dev/null 2>&1 || return 0
  local _cid="\${CYCLE_ID:-pre-cycle-\$\$}"
  local _rid="loop-\${_cid%-*}"
  grep -qF "\"run_id\":\"\$_rid\"" "\$_runs_dst" 2>/dev/null && return 0
  mkdir -p "\$(dirname "\$_runs_dst")"
  # FIX-123: clean stale .tmp files from dead PIDs on entry.
  # A .tmp residue means a previous atomic write was interrupted.
  # Only clean files whose PID (extracted from suffix) is dead.
  local _tmp_dir; _tmp_dir="\$(dirname "\$_runs_dst")"
  for _stale in "\$_tmp_dir"/runs.jsonl.tmp.*; do
    [ -f "\$_stale" ] || continue
    local _stale_pid="\${_stale##*.tmp.}"
    kill -0 "\$_stale_pid" 2>/dev/null || rm -f "\$_stale"
  done
  local _ts_now; _ts_now=\$(date -u +%Y-%m-%dT%H:%M:%SZ)
  local _start="\${CYCLE_START:-\$(date -u +%s)}"
  local _dur=\$(( \$(date -u +%s) - _start ))
  [ "\$_dur" -lt 0 ] && _dur=0
  # FIX-123: atomic write — write to .tmp.$$ first, then cat >> to append,
  # then remove. If interrupted between jq and rm, the next call cleans it.
  local _tmp="\$_runs_dst.tmp.\$\$"
  # US-AGENT-005/010: emit agent + story_type so historical hit rates and
  # status-page summaries have data to aggregate. Empty when not routed.
  local _agent_field="\${ROLL_LOOP_ROUTED_AGENT:-\${CYCLE_AGENT:-}}"
  local _story_field="\${ROLL_LOOP_ROUTED_STORY:-}"
  # US-AGENT-023: the routed complexity tier (easy/default/hard). The router
  # writes the tier into ROLL_LOOP_ROUTED_RULE (field 2 of the route line), so
  # surface it as a first-class runs.jsonl column alongside the routed agent.
  local _tier_field="\${ROLL_LOOP_ROUTED_RULE:-}"
  # US-AGENT-024: when a fallback fired, ROLL_LOOP_FALLBACK_FROM holds the
  # original (offline) agent so the run row records the degradation.
  local _fallback_from_field="\${ROLL_LOOP_FALLBACK_FROM:-}"
  local _stype_field=""
  if [ -n "\$_story_field" ]; then
    _stype_field="\${_story_field%%-*}"
  fi
  # US-LOOP-068: target field — roll-meta for roll-meta stories, empty otherwise
  local _target_field=""
  if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
    _target_field="roll-meta"
  fi
  # US-EVAL-002: compute the objective result_eval block from this cycle's
  # facts via the US-EVAL-001 pure-function rubric. Best-effort — when python3
  # or the scorer script is unavailable the row is written WITHOUT result_eval
  # (older rows have no such key; consumers treat its absence as "not scored").
  #
  # Facts adapter: translate the runner's terminal status vocabulary
  # (idle/built/done/orphan/failed/aborted/blocked) into the rubric's facts
  # shape. "done" is the only terminal that means the PR merged into main, so
  # it maps to merged=true; every other terminal is merged=false. status=idle
  # is passed through verbatim so scope_fidelity scores it as 0. An "orphan"
  # terminal flags an orphan so cleanliness scores 0. CI verdict and est_min
  # are not tracked at this layer, so they are intentionally left absent and
  # the rubric records those dimensions as "unknown" (never a silent 0).
  local _result_eval_json=""
  local _eval_bin="\${_EVAL_SCRIPT:-}"
  if [ -n "\$_eval_bin" ] && [ -f "\$_eval_bin" ] && command -v python3 >/dev/null 2>&1; then
    local _merged_flag=false _orphans_json='[]'
    case "\$_status" in
      done) _merged_flag=true ;;
      orphan) _orphans_json='["orphan"]' ;;
    esac
    local _facts_json
    _facts_json=\$(jq -nc \\
      --arg status "\$_status" \\
      --arg routed_story "\$_story_field" \\
      --argjson built "\$_built" \\
      --argjson tcr_count "\$_tcr" \\
      --argjson duration_sec "\$_dur" \\
      --argjson merged "\$_merged_flag" \\
      --argjson orphans "\$_orphans_json" \\
      '{status:\$status, routed_story:\$routed_story, built:\$built,
        tcr_count:\$tcr_count, duration_sec:\$duration_sec,
        merged:\$merged, orphans:\$orphans}' 2>/dev/null) || _facts_json=""
    if [ -n "\$_facts_json" ]; then
      _result_eval_json=\$(printf '%s' "\$_facts_json" | python3 "\$_eval_bin" 2>/dev/null) || _result_eval_json=""
    fi
  fi
  [ -n "\$_result_eval_json" ] || _result_eval_json="null"
  jq -nc \\
    --argjson result_eval "\$_result_eval_json" \\
    --arg ts "\$_ts_now" \\
    --arg project "${slug}" \\
    --arg run_id "\$_rid" \\
    --arg status "\$_status" \\
    --arg cycle_id "\$_cid" \\
    --arg agent "\$_agent_field" \\
    --arg tier "\$_tier_field" \\
    --arg fallback_from "\$_fallback_from_field" \\
    --arg story_type "\$_stype_field" \\
    --arg target "\$_target_field" \\
    --argjson built "\$_built" \\
    --argjson skipped '[]' \\
    --argjson alerts '[]' \\
    --argjson tcr_count "\$_tcr" \\
    --argjson duration_sec "\$_dur" \\
    --argjson phases "\$_phases_json" \\
    '{ts:\$ts, project:\$project, run_id:\$run_id, status:\$status,
      cycle_id:\$cycle_id, agent:\$agent, tier:\$tier, fallback_from:\$fallback_from,
      story_type:\$story_type, built:\$built, skipped:\$skipped, alerts:\$alerts,
      tcr_count:\$tcr_count, duration_sec:\$duration_sec, phases:\$phases}
     + (if \$target == "" then {} else {target:\$target} end)
     + (if \$result_eval == null then {} else {result_eval:\$result_eval} end)' \\
    > "\$_tmp" 2>/dev/null || { rm -f "\$_tmp"; return 0; }
  # FIX-157: ensure target exists before append; missing file silently drops
  # the write in launchd+inner contexts.
  touch "\$_runs_dst" 2>/dev/null || true
  if ! cat "\$_tmp" >> "\$_runs_dst" 2>/dev/null; then
    echo "[loop] _runs_append: failed to append to \$_runs_dst" >&2
  fi
  rm -f "\$_tmp"
}
_inner_cleanup() {
  local _rc=\$?
  # US-LOOP-007: close any CURRENT_PHASE that wasn't ended explicitly
  # (sigterm / set -e fire / aborted exit paths). Marks as fail so the
  # breakdown panel shows where we died.
  if [ -n "\${CURRENT_PHASE:-}" ]; then
    _phase_end "\$CURRENT_PHASE" fail 2>/dev/null || true
  fi
  # Kill heartbeat + every remaining background job (watchdog, orphan
  # loop-fmt.py, publish subshells) — bash's foreground 'wait' for the
  # pipe has already returned by the time the EXIT trap runs.
  kill "\${_HEARTBEAT_PID}" 2>/dev/null
  for _pid in \$(jobs -p); do kill "\$_pid" 2>/dev/null; done
  if [ "\${_CYCLE_TIMED_OUT:-0}" -eq 1 ]; then
    _loop_event cycle_end "\${CYCLE_ID:-unknown}" "\${BRANCH:-}" "blocked" 2>/dev/null || true
    _CYCLE_END_WRITTEN=1
    # US-LOOP-005 T9: timeout path must also write runs.jsonl row so dashboard
    # has a terminal record (cycle_end alone is insufficient — runs.jsonl is
    # the canonical history feed for 'roll loop runs').
    _phases_t=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_t" ] && _phases_t='{}'
_runs_append "failed" 0 "[]" "\$_phases_t" 2>/dev/null || true
    _worktree_alert "cycle \${CYCLE_ID:-unknown}: \${LOOP_CYCLE_TIMEOUT_SEC}s timeout — claude/python killed; in-progress story marked Blocked" 2>/dev/null || true
  fi
  # FIX-086: aborted-path orphan safety net. When the inner script is killed
  # (e.g. SIGUSR1, parent process death) after claude has committed TCR work
  # but before publish runs, the existing aborted fallback below writes
  # cycle_end aborted and the commits are local-only — if the worktree is
  # later cleaned up, the work is lost. Before falling through to aborted,
  # detect unpushed commits in the worktree and push them as an orphan
  # branch + tag (mirroring FIX-039's PR-publish-failed safety net). On
  # push success → cycle_end orphan; on failure → fall through to aborted
  # path below (no regression).
  # Skip when _CYCLE_TIMED_OUT=1: 45-min hard timeout SIGKILLs claude mid-flight,
  # so commits may not be atomic — keep human-in-loop via blocked path.
  # Guard uses inequality form so the FIX-066 audit anchors on the aborted
  # fallback below, not this block.
  if [ "\${_CYCLE_END_WRITTEN:-0}" != "1" ] \\
     && [ "\${_CYCLE_TIMED_OUT:-0}" = "0" ] \\
     && [ "\${_USE_WORKTREE:-0}" = "1" ] \\
     && [ -n "\${WT:-}" ] \\
     && [ -d "\$WT" ] \\
     && [ -n "\${CYCLE_ID:-}" ]; then
    _unpushed=\$(cd "\$WT" && git rev-list --count "origin/main..HEAD" 2>/dev/null || echo 0)
    if [ "\${_unpushed:-0}" -gt 0 ]; then
      # FIX-091: prefer a real PR so auto-merge lands the work; tag-only is the
      # last-resort because it requires manual cherry-pick. Emit cycle_end "done"
      # (canonical success status the dashboard recognizes) when PR publishes.
      # FIX-099: compute tcr_count + built[] from the worktree (it's still alive
      # at EXIT trap time) so runs.jsonl and ALERT carry truthful data.
      _orphan_tcr=0
      _orphan_built="[]"
      if command -v jq >/dev/null 2>&1; then
        _orphan_tcr=\$(cd "\$WT" && git log --oneline "origin/main..HEAD" 2>/dev/null | grep -c ' tcr:' || echo 0)
        _orphan_built=\$(cd "\$WT" && git log --oneline "origin/main..HEAD" 2>/dev/null \
          | grep ' tcr:' \
          | grep -oE '\b(FIX|US|REFACTOR|CHORE)-[0-9]+\b' \
          | sort -u \
          | jq -R -s 'split("\n") | map(select(length>0))' 2>/dev/null || echo "[]")
      fi
      _slug=""
      if _gh_resolve _slug \\
         && ( cd "\$WT" && _loop_publish_pr "\$BRANCH" "loop cycle \${CYCLE_ID}" ) >/dev/null 2>&1; then
        _loop_event cycle_end "\${CYCLE_ID}" "\${BRANCH:-}" "done" 2>/dev/null || true
        _CYCLE_END_WRITTEN=1
        # FIX-099: pass real tcr_count + built[] instead of 0/"[]"
        _phases_t=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_t" ] && _phases_t='{}'
        _runs_append "done" "\${_orphan_tcr}" "\${_orphan_built}" "\$_phases_t" 2>/dev/null || true
        # FIX-099: three-field ALERT so callers can distinguish recovered orphan
        # from a cycle's normally-picked story (was: "FIX-091 published as PR"
        # which leaked a hardcoded string regardless of what was actually built).
        _worktree_alert "cycle \${CYCLE_ID}: recovered_from_orphan=yes; tcr_commits=\${_orphan_tcr}; stories=\${_orphan_built}; pr_branch=\${BRANCH:-unknown}" 2>/dev/null || true
      else
        _orphan_tag="loop-orphan-\${CYCLE_ID}"
        if ( cd "\$WT" && git push origin "\$BRANCH" 2>/dev/null \\
             && git tag "\$_orphan_tag" 2>/dev/null \\
             && git push origin "\$_orphan_tag" 2>/dev/null ); then
          _loop_event cycle_end "\${CYCLE_ID}" "\${BRANCH:-}" "orphan" 2>/dev/null || true
          _CYCLE_END_WRITTEN=1
          # FIX-099: pass real tcr_count + built[] for the orphan-tag path too
          _phases_t=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_t" ] && _phases_t='{}'
          _runs_append "orphan" "\${_orphan_tcr}" "\${_orphan_built}" "\$_phases_t" 2>/dev/null || true
          _worktree_alert "cycle \${CYCLE_ID}: recovered_from_orphan=yes; tcr_commits=\${_orphan_tcr}; stories=\${_orphan_built}; FIX-086 pushed orphan tag \${_orphan_tag}" 2>/dev/null || true
        fi
      fi
    fi
  fi
  # IDEA-028 / FIX-066: catch every other abort (SIGKILL, set -e fire, ALERT
  # poisoning that bypasses the retry budget, etc.). Without this, cycle_start
  # is emitted but cycle_end never is, and dashboard renders the cycle as
  # "still running" until the next successful cycle rolls past it.
  if [ "\${_CYCLE_END_WRITTEN:-0}" -eq 0 ] && [ -n "\${CYCLE_ID:-}" ]; then
    _loop_event cycle_end "\${CYCLE_ID}" "\${BRANCH:-}" "aborted" 2>/dev/null || true
    _phases_t=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_t" ] && _phases_t='{}'
    _runs_append "aborted" 0 "[]" "\$_phases_t" 2>/dev/null || true
  fi
  # US-LOOP-015: process cycle raw log — strip ANSI, remove CR, rotate
  if [ -n "\${ROLL_CYCLE_LOG_RAW:-}" ] && [ -f "\$ROLL_CYCLE_LOG_RAW" ]; then
    _log_dir="${project_path}/.roll/cycle-logs"
    mkdir -p "\$_log_dir"
    sed -E 's/\x1b\[[0-9;]*[A-Za-z]//g; s/\r$//' "\$ROLL_CYCLE_LOG_RAW" \
      > "\${_log_dir}/\${CYCLE_ID}.log" 2>/dev/null || true
    rm -f "\$ROLL_CYCLE_LOG_RAW"
    # FIX-139: keep ALL per-cycle logs — no rotation cap. Each log is ~2KB,
    # full-year retention is a few MB. Owner: "不做 50 的限制,所有的都要留下来".
  fi
  rm -f "\$INNER_LOCK" "\$HEARTBEAT_FILE" "\$HEARTBEAT_PHASE_FILE"
  exit "\$_rc"
}
trap '_inner_cleanup' EXIT

# US-AUTO-037: pull in worktree helpers (US-AUTO-036). Sourcing bin/roll is
# safe — its main() only runs when invoked directly (BASH_SOURCE == \$0).
# bin/roll's top-level \`set -euo pipefail\` infects us, so disable -e (the
# retry loop relies on tolerating non-zero exits) while keeping pipefail.
source "${roll_bin}"
set +e

# US-LOOP-019: heartbeat files moved here (after source) so _loop_runtime_dir is available.
_LOOP_RT_DIR=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")
if [ -n "\$_LOOP_RT_DIR" ]; then
  HEARTBEAT_FILE="\${_LOOP_RT_DIR}/.heartbeat-${slug}"
  HEARTBEAT_PHASE_FILE="\${_LOOP_RT_DIR}/.phase-${slug}"
  # FIX-157: ensure data-plane files exist so append-mode writes survive
  # archive/reset/new-machine scenarios where the file is absent.
  mkdir -p "\$_LOOP_RT_DIR"
  touch "\${_LOOP_RT_DIR}/events.ndjson" "\${_LOOP_RT_DIR}/runs.jsonl" 2>/dev/null || true
else
  HEARTBEAT_FILE="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/.heartbeat-${slug}"
  HEARTBEAT_PHASE_FILE="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/.phase-${slug}"
fi

# FIX-138: start heartbeat now that _loop_event is defined (see note at its
# definition above). Forking earlier loses _loop_event in the subshell snapshot.
_heartbeat_writer &
_HEARTBEAT_PID=\$!

# US-LOOP-019: control-plane files moved to project-local .roll/loop/.
# FIX-052: bin/roll initializes loop state paths from cwd at source time, but
# the inner script may be launched from anywhere. Override to this project's
# slug (baked at template generation) so helpers like _worktree_alert write
# to the correct project's ALERT-<slug>.md / state-<slug>.yaml / mute-<slug>.
_LOOP_PROJ_SLUG="${slug}"
if [ -n "\$_LOOP_RT_DIR" ]; then
  _LOOP_ALERT="\${_LOOP_RT_DIR}/ALERT-${slug}.md"
  _LOOP_STATE="\${_LOOP_RT_DIR}/state-${slug}.yaml"
  _LOOP_MUTE_FILE="\${_LOOP_RT_DIR}/mute-${slug}"
else
  _LOOP_ALERT="\${_SHARED_ROOT}/loop/ALERT-${slug}.md"
  _LOOP_STATE="\${_SHARED_ROOT}/loop/state-${slug}.yaml"
  _LOOP_MUTE_FILE="\${_SHARED_ROOT}/loop/mute-${slug}"
fi
# US-LOOP-006: ROLL_MAIN_SLUG is the canonical identity for any subprocess —
# claude, loop-fmt.py, _loop_event in arbitrary cwd. _project_slug honors this
# env var first, so writes never fragment into tmp-* / cycle-* phantom slugs.
export ROLL_MAIN_SLUG="${slug}"
# FIX-070: helpers that need to update the main repo's backlog (e.g. when a
# worktree cycle marks a story 🔨 In Progress) read ROLL_MAIN_PROJECT to
# locate it — the cycle's own cwd is the worktree, not main.
export ROLL_MAIN_PROJECT="${project_path}"

# Pre-claude: try to create a per-cycle isolated worktree on origin/main.
# On any failure (no remote, no main, etc.) fall back to running in the
# project's main tree (degraded — no isolation, like pre-037 behavior).
CYCLE_ID="\$(date +%Y%m%d-%H%M%S)-\$\$"
CYCLE_START=\$(date +%s)
WT="\$(_worktree_path "${slug}" "cycle-\${CYCLE_ID}")"
BRANCH="loop/cycle-\${CYCLE_ID}"
_USE_WORKTREE=0
# US-LOOP-007: startup phase covers env / lock / heartbeat setup. End it now
# that cycle vars are bound and we're about to do real work.
_phase_begin startup
_phase_end startup ok
_phase_begin preflight
cd "${project_path}" 2>/dev/null || true
# US-INFRA-008: ensure git hooks are wired so TCR pre-commit gate can't be bypassed
_ensure_hooks_path "${project_path}" 2>/dev/null || true
# US-LOOP-056: sync .roll/ meta from roll-meta remote before backlog scan
_loop_sync_meta "${project_path}" || true
# US-SYNC-008: when backlog_sync.on_loop_cycle is true, pull new GitHub issues
# into the backlog before the story scan. Default off; fail-soft (ALERT only).
_loop_backlog_sync_hook "${project_path}" || true
# FIX-104: GC stale merged temp branches at cycle entry — before worktree setup
# and before any early-exit gate (pre-run abort, CI red precheck). The post-claude
# call site doesn't cover those paths, so merged branches accumulated on origin.
_loop_cleanup_stale_cycle_branches "${project_path}" || true
# FIX-040: orphan worktree recovery — scan for worktrees left by previous failed
# cycles (publish failed or inner script was SIGKILL'd). Attempt to publish each
# before starting the new cycle. Glob is chronological via timestamp in name.
for _orphan_wt in "\${_SHARED_ROOT}/worktrees/${slug}-cycle-"*; do
  [ -d "\$_orphan_wt" ] || continue
  # Confirm it's a real worktree directory (not glob literal when no matches)
  [ -d "\${_orphan_wt}/.git" ] || [ -f "\${_orphan_wt}/.git" ] || continue
  _orphan_branch=\$(cd "\$_orphan_wt" && git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
  [ -z "\$_orphan_branch" ] && continue
  # FIX-114: PR for this branch may already be squash-merged externally. In
  # that case origin/main..HEAD still shows commits (squash created new SHA)
  # so the old "needs republish" path tries to recreate the PR and fails.
  # Ask gh first; if MERGED → drop the worktree clean.
  if command -v gh >/dev/null 2>&1; then
    _orphan_pr_state=\$(gh pr view "\$_orphan_branch" --json state -q .state 2>/dev/null || echo "")
    if [ "\$_orphan_pr_state" = "MERGED" ]; then
      echo "[loop] FIX-114: orphan worktree \$_orphan_wt branch \$_orphan_branch already merged remotely; cleaning up"
      _worktree_cleanup "\$_orphan_wt" "\$_orphan_branch"
      continue
    fi
  fi
  _orphan_commits=\$(cd "\$_orphan_wt" && git rev-list --count origin/main..HEAD 2>/dev/null || echo 0)
  if [ "\$_orphan_commits" -gt 0 ]; then
    echo "[loop] FIX-040: recovering orphan worktree \$_orphan_wt (branch \$_orphan_branch, \${_orphan_commits} commits)"
    # FIX-045: rebase onto origin/main before publishing — avoids BEHIND state on GitHub
    if ! ( cd "\$_orphan_wt" && git fetch origin main 2>/dev/null && git rebase origin/main 2>/dev/null ); then
      echo "[loop] FIX-045: orphan \$_orphan_branch rebase failed — skipping recovery (conflict or network error)"
      continue
    fi
    _orphan_ok=0
    if ( cd "\$_orphan_wt" && _loop_is_doc_only_change ); then
      ( cd "\$_orphan_wt" && _loop_publish_doc_pr "\$_orphan_branch" "doc: recover orphan \${_orphan_branch}" ) && _orphan_ok=1
    else
      ( cd "\$_orphan_wt" && _loop_publish_pr "\$_orphan_branch" "recover orphan \${_orphan_branch}" ) && _orphan_ok=1
    fi
    if [ "\$_orphan_ok" -eq 1 ]; then
      # US-LOOP-068: if orphan contains a roll-meta worktree, clean it first
      if [ -d "\${_orphan_wt}/.roll/.git" ]; then
        _loop_roll_meta_worktree_cleanup "\$_orphan_wt" "\$_orphan_branch" "${project_path}" 2>/dev/null || true
      fi
      _worktree_cleanup "\$_orphan_wt" "\$_orphan_branch"
      echo "[loop] FIX-040: orphan recovered and cleaned: \$_orphan_branch"
    else
      echo "[loop] FIX-040: orphan recovery publish failed for \$_orphan_branch — leaving preserved"
    fi
  else
    echo "[loop] FIX-040: orphan worktree \$_orphan_wt has no commits; cleaning up"
    # US-LOOP-068: if orphan contains a roll-meta worktree, clean it first
    if [ -d "\${_orphan_wt}/.roll/.git" ]; then
      _loop_roll_meta_worktree_cleanup "\$_orphan_wt" "\$_orphan_branch" "${project_path}" 2>/dev/null || true
    fi
    _worktree_cleanup "\$_orphan_wt" "\$_orphan_branch"
  fi
done
# US-AUTO-038: snapshot orphan claude/* branches before claude runs so the
# post-claude cleanup can diff and delete only this session's additions.
CLAUDE_BRANCH_SNAPSHOT="\$(_claude_remote_snapshot "${project_path}")"
_phase_end preflight ok
_phase_begin worktree_setup
if _worktree_fetch_origin main \\
   && _worktree_create "\$WT" "\$BRANCH" "origin/main"; then
  _USE_WORKTREE=1
  _worktree_submodule_init "\$WT" 2>/dev/null || true
  # FIX-069: copy .roll/ meta (backlog, skills, conventions) into the
  # worktree as a read-only reference. Without this the cycle no-ops
  # because .roll/ is gitignored and the clean clone has no backlog
  # for Claude to read or skill entry points to dispatch to.
  _worktree_sync_meta "\$WT" 2>/dev/null || true
  # US-LOOP-068: for roll-meta stories, replace the synced .roll/ with a
  # real roll-meta git worktree so commits land in roll-meta remote.
  if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
    if _loop_roll_meta_worktree_setup "\$WT" "\$BRANCH" "${project_path}" 2>/dev/null; then
      echo "[loop] cycle \${CYCLE_ID}: roll-meta worktree \${WT}/.roll on \$BRANCH"
    else
      echo "[loop] cycle \${CYCLE_ID}: roll-meta worktree setup failed — falling back to normal"
      _ROLL_META_TARGET=0
    fi
  fi
  echo "[loop] cycle \${CYCLE_ID}: worktree \$WT on \$BRANCH"
  _loop_event cycle_start "\${CYCLE_ID}" "" "" || true
  # US-AGENT-006: per-story routing — pick the next Todo, route to an agent
  # based on its Agent profile + agent-routes.yaml, fall back to
  # _project_agent when no story is pickable or routing returns nothing.
  ROLL_LOOP_ROUTED_STORY=""
  ROLL_LOOP_ROUTED_AGENT=""
  ROLL_LOOP_ROUTED_RULE=""
  ROLL_LOOP_ROUTE_RATIONALE=""
  ROLL_LOOP_FALLBACK_FROM=""
  ROLL_LOOP_ROUTED_STORY=\$( (cd "\$WT" 2>/dev/null && _loop_pick_next_story 2>/dev/null) || echo "" )
  # FIX-146: close TOCTOU window — if the picked story became ineligible between
  # pick and agent handoff, re-pick once rather than idling on a stale route.
  if [ -n "\$ROLL_LOOP_ROUTED_STORY" ]; then
    if ! (cd "\$WT" 2>/dev/null && _loop_story_is_eligible "\$ROLL_LOOP_ROUTED_STORY" 2>/dev/null); then
      echo "[loop] story \${ROLL_LOOP_ROUTED_STORY} stale — became ineligible after pick; re-picking"
      _loop_event story_stale "\${CYCLE_ID}" "\${ROLL_LOOP_ROUTED_STORY}" "" || true
      ROLL_LOOP_ROUTED_STORY=\$( (cd "\$WT" 2>/dev/null && _loop_pick_next_story 2>/dev/null) || echo "" )
    fi
  fi
  # FIX-161: hard assertion before handoff — if the story is still not 📋 Todo
  # after all re-picks, clear it so the cycle idles rather than routing garbage.
  if [ -n "\$ROLL_LOOP_ROUTED_STORY" ]; then
    if ! (cd "\$WT" 2>/dev/null && _loop_story_is_eligible "\$ROLL_LOOP_ROUTED_STORY" 2>/dev/null); then
      echo "[loop] FIX-161: story \${ROLL_LOOP_ROUTED_STORY} is not Todo after handoff guard — clearing"
      _loop_event story_stale "\${CYCLE_ID}" "\${ROLL_LOOP_ROUTED_STORY}" "FIX-161 handoff guard" || true
      ROLL_LOOP_ROUTED_STORY=""
    fi
  fi
  if [ -n "\$ROLL_LOOP_ROUTED_STORY" ]; then
    _route_line=\$( (cd "\$WT" 2>/dev/null && _loop_pick_agent_for_story "\$ROLL_LOOP_ROUTED_STORY" 2>/dev/null) || echo "" )
    if [ -n "\$_route_line" ]; then
      ROLL_LOOP_ROUTED_AGENT=\$(echo "\$_route_line" | awk '{print \$1}')
      ROLL_LOOP_ROUTED_RULE=\$(echo "\$_route_line" | awk '{print \$2}')
      ROLL_LOOP_ROUTE_RATIONALE=\$(echo "\$_route_line" | cut -d' ' -f3-)
      echo "[loop] story \${ROLL_LOOP_ROUTED_STORY} routed to \${ROLL_LOOP_ROUTED_AGENT} via \${ROLL_LOOP_ROUTED_RULE}"
      _loop_event story_routed "\${CYCLE_ID}" "\${ROLL_LOOP_ROUTED_STORY}" "\${ROLL_LOOP_ROUTED_AGENT}|\${ROLL_LOOP_ROUTED_RULE}" || true
      # US-AGENT-024: mechanical fallback — if the routed agent is offline
      # (no PATH / auth / network), swap to the fallback slot agent. When the
      # fallback is also down, _loop_resolve_fallback_agent returns 2 after
      # writing an ALERT; we leave the routed agent as-is so the cycle still
      # records a terminal run (the ALERT is the operator-facing signal).
      _fb_line=\$( (cd "\$WT" 2>/dev/null && _loop_resolve_fallback_agent "\$ROLL_LOOP_ROUTED_AGENT" 2>/dev/null) || echo "" )
      if [ -n "\$_fb_line" ]; then
        _fb_agent=\$(echo "\$_fb_line" | awk '{print \$1}')
        ROLL_LOOP_FALLBACK_FROM=\$(echo "\$_fb_line" | awk '{print \$2}')
        if [ -n "\$ROLL_LOOP_FALLBACK_FROM" ] && [ -n "\$_fb_agent" ]; then
          echo "[loop] agent \${ROLL_LOOP_FALLBACK_FROM} unavailable; cycle falls back to \${_fb_agent}"
          _loop_event agent_fallback "\${CYCLE_ID}" "\${ROLL_LOOP_FALLBACK_FROM}" "\${_fb_agent}" || true
          ROLL_LOOP_ROUTED_AGENT="\$_fb_agent"
        fi
      fi
    fi
  fi
  CYCLE_AGENT="\${ROLL_LOOP_ROUTED_AGENT:-\$(_project_agent)}"
  _loop_event agent_used "\${CYCLE_ID}" "\${CYCLE_AGENT}" "primary" || true
  # US-LOOP-068: detect roll-meta target after routing is complete
  _ROLL_META_TARGET=0
  if [ -n "\$ROLL_LOOP_ROUTED_STORY" ]; then
    if _loop_is_roll_meta_story "\$ROLL_LOOP_ROUTED_STORY" "\${ROLL_MAIN_PROJECT}/.roll/backlog.md" 2>/dev/null; then
      _ROLL_META_TARGET=1
      echo "[loop] story \${ROLL_LOOP_ROUTED_STORY} is roll-meta target"
    fi
  fi
  export ROLL_LOOP_ROLL_META_TARGET="\${_ROLL_META_TARGET}"
  _phase_end worktree_setup ok
else
  # P3 fix: skip the cycle entirely when worktree isolation fails.
  # --dangerously-skip-permissions is only safe paired with worktree isolation;
  # falling back to the main tree without isolation is unacceptable.
  _worktree_alert "cycle \${CYCLE_ID}: worktree setup failed — skipping cycle to avoid running without isolation"
  echo "[loop] cycle \${CYCLE_ID}: worktree setup failed; skipping cycle (no isolation)"
  _phase_end worktree_setup fail
  # US-LOOP-005 T10: worktree-setup-failed path leaves no commits and never
  # emits cycle_start, but dashboard still needs a runs.jsonl row marking the
  # cycle as failed (otherwise the scheduled tick appears to have vanished).
  _phases_t=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_t" ] && _phases_t='{}'
_runs_append "failed" 0 "[]" "\$_phases_t" 2>/dev/null || true
  trap '_inner_cleanup' EXIT
  exit 0
fi

FMT="${fmt_script}"
# US-EVAL-002: path to the rubric scorer (US-EVAL-001), baked at generation
# time. _runs_append reads it via \${_EVAL_SCRIPT:-} to compute result_eval.
_EVAL_SCRIPT="${eval_script}"
# US-LOOP-004: hand loop-fmt the slug + cycle id + shared root so it can
# append a per-cycle 'usage' event into events-<slug>.ndjson with
# tokens / cost / model / duration. Reader (roll loop status) consumes
# that instead of having to scrape the overwritten cron.log.
export LOOP_PROJECT_SLUG="${slug}"
export LOOP_CYCLE_ID="\${CYCLE_ID}"
export LOOP_SHARED_ROOT="\${_SHARED_ROOT:-\$HOME/.shared/roll}"
# US-LOOP-010: tell loop-fmt.py which agent is running so it can branch
# rendering: claude → stream-json parser, others → transparent passthrough.
# US-AGENT-006: prefer the per-story routed agent (set above) when present.
export ROLL_LOOP_AGENT="\${CYCLE_AGENT:-\$(_project_agent)}"
export ROLL_LOOP_ROUTED_STORY ROLL_LOOP_ROUTED_AGENT ROLL_LOOP_ROUTED_RULE ROLL_LOOP_FALLBACK_FROM
_phase_begin agent_invoke
# FIX-136: non-claude agents (pi/deepseek/kimi) buffer stdout when piped.
# Force a pseudo-TTY via script(1) so loop-fmt.py's passthrough receives
# output in real time — without this, tmux is black for the entire phase.
_AGENT_PTY_PREFIX=""
[ "\$ROLL_LOOP_AGENT" != "claude" ] && _AGENT_PTY_PREFIX="script -q /dev/null"
for _attempt in 1 2 3; do
  # FIX-068: defensive reset before each attempt — _CYCLE_TIMED_OUT carries
  # the SIGTERM result of the previous attempt and would otherwise force an
  # immediate break on a clean retry.
  _CYCLE_TIMED_OUT=0
  # FIX-057 + FIX-068: watchdog — fires SIGTERM at the inner script (and its
  # direct children) when the cycle exceeds LOOP_CYCLE_TIMEOUT_SEC, then
  # escalates to SIGKILL after a 5s grace period for any claude process
  # still alive. claude lives inside a pipeline subshell, so pkill -P \$\$
  # alone only catches the subshell, not claude itself; matching by the
  # worktree path (which appears in claude's --add-dir arg, FIX-048) targets
  # the cycle's claude uniquely without touching other projects' processes.
  ( sleep "\$LOOP_CYCLE_TIMEOUT_SEC" && {
      kill -TERM \$\$ 2>/dev/null
      pkill -TERM -P \$\$ 2>/dev/null
      pkill -TERM -f "\$WT" 2>/dev/null
      sleep 5
      pkill -KILL -P \$\$ 2>/dev/null
      pkill -KILL -f "\$WT" 2>/dev/null
    } ) &
  _WATCHDOG_PID=\$!
  ${cycle_cmd_line}
  # FIX-134: prefer the runtime-rebuilt command (routing-aware); fall back to
  # the baked command (project agent at \`roll loop on\` time) when empty.
  if [ -f "\$FMT" ]; then
    if [ -n "\$_CYCLE_CMD" ]; then ( cd "\$WT" && eval \$_AGENT_PTY_PREFIX "\$_CYCLE_CMD" ) | python3 "\$FMT"
    else ( cd "\$WT" && \$_AGENT_PTY_PREFIX ${agent_cmd} ) | python3 "\$FMT"; fi
  else
    if [ -n "\$_CYCLE_CMD" ]; then ( cd "\$WT" && eval \$_AGENT_PTY_PREFIX "\$_CYCLE_CMD" )
    else ( cd "\$WT" && \$_AGENT_PTY_PREFIX ${agent_cmd} ); fi
  fi
  _exit=\$?
  kill "\$_WATCHDOG_PID" 2>/dev/null
  wait "\$_WATCHDOG_PID" 2>/dev/null
  [ "\$_CYCLE_TIMED_OUT" -eq 1 ] && break
  [ "\$_exit" -eq 0 ] && break
  if [ "\$_attempt" -lt 3 ]; then
    echo "[loop] claude exited \$_exit (attempt \$_attempt/3) — retrying in 30s..."
    sleep 30
  fi
done

if [ "\$_CYCLE_TIMED_OUT" -eq 1 ] || [ "\$_exit" -ne 0 ]; then
  _phase_end agent_invoke fail
else
  _phase_end agent_invoke ok
fi

# US-LOOP-026 + FIX-154: non-claude agents (pi/deepseek/kimi) print no usage
# in -p text mode. Recover token+cost once per cycle from the agent's session
# jsonl and append a single authoritative usage event. Done here (not in
# loop-fmt's per-attempt passthrough) so retries can't write N duplicate
# events that the dashboard's same-label SUM would inflate. Runs before the
# timeout-abort exit so partial cycles still get whatever usage the session
# recorded. The events path is resolved exactly like _loop_event (rt_dir
# first, shared fallback) so the emitter appends to the same file the reader
# consumes. Dispatch by agent so each emitter reads the right session format
# (pi.usage_from_session vs kimi.usage_from_session).
# FIX-164: dispatch on the per-cycle ROUTED agent (CYCLE_AGENT), not the project
# default (_project_agent). With the project default, a kimi-default project ran
# kimi_emit for EVERY cycle — so deepseek/pi cycles emitted no usage event at all
# (kimi_emit finds no kimi session for a pi cycle), and the dashboard showed them
# with empty token/cost. CYCLE_AGENT (set above by the router) is the agent that
# actually ran this cycle, so the right emitter reads the right session format.
_emit_agent="\${CYCLE_AGENT:-\$(_project_agent)}"
if [ "\$_emit_agent" != "claude" ]; then
  _pi_rt=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")
  if [ -n "\$_pi_rt" ]; then
    _pi_evfile="\${_pi_rt}/events.ndjson"
  else
    _pi_evfile="\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/events-${slug}.ndjson"
  fi
  case "\$_emit_agent" in
    kimi)
      if [ -f "${kimi_emit_script}" ]; then
        python3 "${kimi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
          --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
      fi
      ;;
    *)
      if [ -f "${pi_emit_script}" ]; then
        python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
          --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
      fi
      ;;
  esac
fi

# FIX-057: timed out — skip publish; EXIT trap writes cycle_end blocked + ALERT.
if [ "\$_CYCLE_TIMED_OUT" -eq 1 ]; then
  echo "[loop] cycle \${CYCLE_ID}: \${LOOP_CYCLE_TIMEOUT_SEC}s timeout — aborting cycle (worktree preserved at \$WT)"
  trap '_inner_cleanup' EXIT
  exit 0
fi

# FIX-044: capture cycle data from worktree before cleanup removes it
_cycle_tcr=0
_cycle_status="idle"
_cycle_built="[]"
if [ "\$_USE_WORKTREE" = "1" ]; then
  if [ "\$_exit" -ne 0 ]; then
    _cycle_status="failed"
  else
    # US-LOOP-068: for roll-meta stories, count commits in the roll-meta worktree
    if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
      _cycle_commits_pre=\$(cd "\$WT/.roll" && git rev-list --count origin/main..HEAD 2>/dev/null || echo 0)
    else
      _cycle_commits_pre=\$(cd "\$WT" && git rev-list --count origin/main..HEAD 2>/dev/null || echo 0)
    fi
    if [ "\$_cycle_commits_pre" -gt 0 ]; then
      _cycle_status="built"
      if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
        _cycle_tcr=\$(cd "\$WT/.roll" && git log --oneline origin/main..HEAD -- 2>/dev/null | grep -c ' tcr:' || echo 0)
      else
        _cycle_tcr=\$(cd "\$WT" && git log --oneline origin/main..HEAD -- 2>/dev/null | grep -c ' tcr:' || echo 0)
      fi
      if command -v jq >/dev/null 2>&1; then
        if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
          _cycle_built=\$(cd "\$WT/.roll" && git diff origin/main -- backlog.md 2>/dev/null | grep '✅ Done' | grep -oE '\[[A-Z]+-[0-9]+\]' | sed 's/^.//;s/.\$//' | jq -R -s 'split("\n") | map(select(length>0))' 2>/dev/null || echo "[]")
        else
          _cycle_built=\$(cd "\$WT" && git diff origin/main -- .roll/backlog.md 2>/dev/null | grep '✅ Done' | grep -oE '\[[A-Z]+-[0-9]+\]' | sed 's/^.//;s/.\$//' | jq -R -s 'split("\n") | map(select(length>0))' 2>/dev/null || echo "[]")
        fi
      fi
    fi
  fi
fi

# US-AUTO-038: diff snapshot vs current and delete any claude/* branches this
# session pushed to origin. Runs regardless of claude's exit code (cleanup is
# orthogonal to success/failure) and is silent on non-GitHub / unreachable.
_claude_cleanup_new_branches "\$CLAUDE_BRANCH_SNAPSHOT" "${project_path}" || true
# REFACTOR-011: also prune local .claude/worktrees/ entries whose branch has
# been merged to main (remote-branch cleanup above doesn't touch local worktrees).
_claude_cleanup_stale_worktrees "${project_path}" || true

# Post-claude: publish cycle branch. Doc-only changes (BACKLOG/docs) merge
# immediately via --admin; code changes use auto-merge (CI gate required).
# When \`gh\` is unavailable, fall back to the legacy ff-merge path.
if [ "\$_USE_WORKTREE" = "1" ]; then
  if [ "\$_exit" -eq 0 ]; then
    # Idle cycle — no commits ahead of origin/main means nothing was built;
    # skip publish and reclaim the worktree immediately.
    # US-LOOP-068: for roll-meta stories, count commits in roll-meta worktree
    if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
      _cycle_commits=\$(cd "\$WT/.roll" && git rev-list --count origin/main..HEAD 2>/dev/null || echo 0)
    else
      _cycle_commits=\$(cd "\$WT" && git rev-list --count origin/main..HEAD 2>/dev/null || echo 0)
    fi
    if [ "\$_cycle_commits" -eq 0 ]; then
      # US-LOOP-068: clean up roll-meta worktree before product worktree
      if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
        _loop_roll_meta_worktree_cleanup "\$WT" "\$BRANCH" "${project_path}" 2>/dev/null || true
      fi
      _worktree_cleanup "\$WT" "\$BRANCH"
      _loop_event idle "\${CYCLE_ID}" "" "" || true
      # FIX-F (2026-05-25): explicitly write the terminal "idle" cycle_end +
      # runs row here, otherwise the EXIT trap's catch-all fallback (which
      # writes "aborted" when _CYCLE_END_WRITTEN is still 0) will reclassify
      # this successful no-op as a failure on the dashboard.
      _loop_event cycle_end "\${CYCLE_ID}" "" "idle" || true
      _CYCLE_END_WRITTEN=1
      # US-OBS-014: idle cycles push too — keeps the remote heartbeat fresh so
      # remote-watch can tell "loop online but no work" apart from "loop offline".
      _loop_push_status_snapshot || true
      _phases_idle=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_idle" ] && _phases_idle='{}'
      _runs_append "idle" 0 "[]" "\$_phases_idle" 2>/dev/null || true
      echo "[loop] cycle \${CYCLE_ID}: idle (no new commits); worktree cleaned"
    else
      _is_doc_only=0
      # US-LOOP-068: for roll-meta stories, doc-only check runs in roll-meta worktree
      if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
        ( cd "\$WT/.roll" && _loop_is_doc_only_change ) && _is_doc_only=1
      else
        ( cd "\$WT" && _loop_is_doc_only_change ) && _is_doc_only=1
      fi
      # US-LOOP-069: roll-meta boundary guard — abort if a roll-meta story touched product files
      if ! _loop_guard_roll_meta_boundary "\$WT" "\$ROLL_LOOP_ROUTED_STORY"; then
        _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
        _phases_guard=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_guard" ] && _phases_guard='{}'
        _runs_append "failed" 0 "[]" "\$_phases_guard" 2>/dev/null || true
        echo "[loop] cycle \${CYCLE_ID}: US-LOOP-069 blocked — roll-meta story touched product files; worktree preserved at \$WT"
        trap '_inner_cleanup' EXIT
        exit 0
      fi
      # US-LOOP-068: roll-meta test gate — run roll-meta tests when ops/ changed
      if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
        if ! _loop_roll_meta_test_gate "\$WT"; then
          _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
          _phases_test=\$(_phases_to_json 2>/dev/null); [ -z "\$_phases_test" ] && _phases_test='{}'
          _runs_append "failed" 0 "[]" "\$_phases_test" 2>/dev/null || true
          echo "[loop] cycle \${CYCLE_ID}: roll-meta test gate failed; worktree preserved at \$WT"
          trap '_inner_cleanup' EXIT
          exit 0
        fi
      fi
      _phase_begin publish_push
      # US-LOOP-068: roll-meta stories publish to roll-meta remote
      if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
        _loop_roll_meta_publish "\$WT" "\$BRANCH" "loop cycle \${CYCLE_ID}"
      else
        if [ "\$_is_doc_only" -eq 1 ]; then
          ( cd "\$WT" && _loop_publish_doc_pr "\$BRANCH" "doc: loop cycle \${CYCLE_ID}" )
        else
          ( cd "\$WT" && _loop_publish_pr "\$BRANCH" "loop cycle \${CYCLE_ID}" )
        fi
      fi
      _publish_status=\$?
      if [ "\$_publish_status" -eq 0 ]; then
        _phase_end publish_push ok
        # US-AUTO-044 Phase 2: the main loop no longer blocks on merge (this
        # replaces FIX-047's synchronous _loop_wait_pr_merge, now redundant).
        # The PR is handed to the dedicated PR Loop (com.roll.pr.<slug>, every
        # 5 min) which merges / rebases / closes it asynchronously. The story is
        # not re-picked meanwhile via the open-PR eligibility gate
        # (_loop_story_is_eligible, FIX-146); its ✅ Done rides the PR diff and
        # lands on main when the PR Loop merges. No false-Done risk: with
        # worktree isolation the Done lives only in the unmerged PR, never on the
        # loop's main checkout, so FIX-140's revert is no longer needed.
        echo "[loop] cycle \${CYCLE_ID}: PR published (\${BRANCH}); merge handed to PR Loop"
        _phase_begin cleanup
        # US-VIEW-011: emit terminal PR state (merged/closed/open) before cycle_end
        # so dashboard renders #NN ✓/↩/… correctly. Must run while branch ref
        # is still resolvable on remote — gh pr view <branch> needs the head ref.
        _loop_emit_pr_final "\$BRANCH" 2>/dev/null || true
        # US-LOOP-068: clean up roll-meta worktree before product worktree
        if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
          _loop_roll_meta_worktree_cleanup "\$WT" "\$BRANCH" "${project_path}" 2>/dev/null || true
        fi
        _worktree_cleanup "\$WT" "\$BRANCH"
        _loop_event cycle_end "\${CYCLE_ID}" "" "done" || true; _CYCLE_END_WRITTEN=1
        # US-OBS-014: normal-completion path — push a fresh status snapshot.
        _loop_push_status_snapshot || true
        _phase_end cleanup ok
        echo "[loop] cycle \${CYCLE_ID}: published; worktree cleaned"
      elif [ "\$_publish_status" -eq 2 ]; then
        # US-LOOP-068: for roll-meta, merge_back doesn't apply — skip straight to
        # the orphan-push safety net (roll-meta commits live on a different remote).
        _merged_back=0
        if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
          :
        elif ( cd "${project_path}" && _worktree_merge_back "\$BRANCH" ); then
          _worktree_cleanup "\$WT" "\$BRANCH"
          # US-LOOP-005 T3: gh unavailable + ff merge_back OK → cycle_end done
          _loop_event cycle_end "\${CYCLE_ID}" "" "done" || true; _CYCLE_END_WRITTEN=1
          echo "[loop] cycle \${CYCLE_ID}: gh unavailable; merged via ff and cleaned up"
          _merged_back=1
        fi
        # FIX-039: gh unavailable + merge_back failed — push orphan branch+tag to origin
        # as final safety net so code is never local-only before worktree cleanup.
        # Skip entirely when merge_back already finalized the cycle (the worktree is
        # gone, so re-pushing from \$WT would spuriously fail and raise a false alert).
        # US-LOOP-068: for roll-meta, push from roll-meta worktree
        if [ "\$_merged_back" -ne 1 ]; then
        _orphan_tag="loop-orphan-\${CYCLE_ID}"
        if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
          if ( cd "\$WT/.roll" && git push origin "\$BRANCH" 2>/dev/null \
               && git tag "\$_orphan_tag" 2>/dev/null \
               && git push origin "\$_orphan_tag" 2>/dev/null ); then
            _loop_roll_meta_worktree_cleanup "\$WT" "\$BRANCH" "${project_path}" 2>/dev/null || true
            _worktree_cleanup "\$WT" "\$BRANCH"
            # US-LOOP-005 T4: gh unavailable + orphan push OK → cycle_end orphan
            _loop_event cycle_end "\${CYCLE_ID}" "" "orphan" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: gh+merge_back failed; FIX-039 pushed orphan+tag \${_orphan_tag}; worktree cleaned"
            echo "[loop] cycle \${CYCLE_ID}: FIX-039: orphan branch+tag \${_orphan_tag} pushed; worktree cleaned"
          else
            # US-LOOP-005 T5: gh unavailable + all failed → cycle_end failed
            _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: gh+merge_back+push all failed; worktree preserved at \$WT"
            echo "[loop] cycle \${CYCLE_ID}: all publish paths failed; worktree preserved at \$WT"
          fi
        else
          if ( cd "\$WT" && git push origin "\$BRANCH" 2>/dev/null \
               && git tag "\$_orphan_tag" 2>/dev/null \
               && git push origin "\$_orphan_tag" 2>/dev/null ); then
            _worktree_cleanup "\$WT" "\$BRANCH"
            # US-LOOP-005 T4: gh unavailable + orphan push OK → cycle_end orphan
            _loop_event cycle_end "\${CYCLE_ID}" "" "orphan" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: gh+merge_back failed; FIX-039 pushed orphan+tag \${_orphan_tag}; worktree cleaned"
            echo "[loop] cycle \${CYCLE_ID}: FIX-039: orphan branch+tag \${_orphan_tag} pushed; worktree cleaned"
          else
            # US-LOOP-005 T5: gh unavailable + all failed → cycle_end failed
            _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: gh+merge_back+push all failed; worktree preserved at \$WT"
            echo "[loop] cycle \${CYCLE_ID}: all publish paths failed; worktree preserved at \$WT"
          fi
        fi
        fi
      else
        # FIX-039: PR publish failed — push orphan branch+tag to origin as safety net.
        # (_loop_publish_pr may have already pushed the branch; git push is idempotent.)
        # US-LOOP-068: for roll-meta, push from roll-meta worktree
        _orphan_tag="loop-orphan-\${CYCLE_ID}"
        if [ "\${_ROLL_META_TARGET:-0}" = "1" ]; then
          if ( cd "\$WT/.roll" && git push origin "\$BRANCH" 2>/dev/null \
               && git tag "\$_orphan_tag" 2>/dev/null \
               && git push origin "\$_orphan_tag" 2>/dev/null ); then
            _loop_roll_meta_worktree_cleanup "\$WT" "\$BRANCH" "${project_path}" 2>/dev/null || true
            _worktree_cleanup "\$WT" "\$BRANCH"
            # US-LOOP-005 T6: PR publish failed + orphan push OK → cycle_end orphan
            _loop_event cycle_end "\${CYCLE_ID}" "" "orphan" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: PR publish failed; FIX-039 pushed orphan+tag \${_orphan_tag}; worktree cleaned"
            echo "[loop] cycle \${CYCLE_ID}: FIX-039: orphan branch+tag \${_orphan_tag} pushed; worktree cleaned"
          else
            # US-LOOP-005 T7: PR publish failed + orphan push failed → cycle_end failed
            _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: PR publish failed; worktree preserved at \$WT (branch \$BRANCH)"
            echo "[loop] cycle \${CYCLE_ID}: PR publish failed; worktree preserved at \$WT"
          fi
        else
          if ( cd "\$WT" && git push origin "\$BRANCH" 2>/dev/null \
               && git tag "\$_orphan_tag" 2>/dev/null \
               && git push origin "\$_orphan_tag" 2>/dev/null ); then
            _worktree_cleanup "\$WT" "\$BRANCH"
            # US-LOOP-005 T6: PR publish failed + orphan push OK → cycle_end orphan
            _loop_event cycle_end "\${CYCLE_ID}" "" "orphan" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: PR publish failed; FIX-039 pushed orphan+tag \${_orphan_tag}; worktree cleaned"
            echo "[loop] cycle \${CYCLE_ID}: FIX-039: orphan branch+tag \${_orphan_tag} pushed; worktree cleaned"
          else
            # US-LOOP-005 T7: PR publish failed + orphan push failed → cycle_end failed
            _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
            _worktree_alert "cycle \${CYCLE_ID}: PR publish failed; worktree preserved at \$WT (branch \$BRANCH)"
            echo "[loop] cycle \${CYCLE_ID}: PR publish failed; worktree preserved at \$WT"
          fi
        fi
      fi
    fi
  else
    # US-LOOP-005 T8: claude session failed after retry budget → cycle_end failed
    _loop_event cycle_end "\${CYCLE_ID}" "" "failed" || true; _CYCLE_END_WRITTEN=1
    _worktree_alert "cycle \${CYCLE_ID}: claude exited \$_exit; worktree preserved at \$WT (branch \$BRANCH)"
    echo "[loop] cycle \${CYCLE_ID}: claude failed (exit \$_exit); worktree preserved at \$WT"
  fi
fi

# US-LOOP-008: cycle Phase Breakdown panel — printed to stdout (tmux readers)
# before runs.jsonl is appended, so the user sees timings even if jq is missing.
_print_phase_breakdown 2>/dev/null || true
_phases_for_runs=\$(_phases_to_json 2>/dev/null || echo '{}')
# FIX-044 / Step 5: Write loop cycle run summary to runs.jsonl
# Deterministic — runs in shell regardless of whether agent executes SKILL.md Step 5.
# US-LOOP-005: now routed through _runs_append so timeout/worktree-setup-fail
# share the same write logic. _runs_append is idempotent on run_id.
_runs_append "\$_cycle_status" "\$_cycle_tcr" "\$_cycle_built" "\$_phases_for_runs" 2>/dev/null || true
# US-EVAL-004: after the scored row lands, surface any repeated low-score
# pattern as a candidate draft (deduped, never auto-activated). Quiet so the
# cycle log stays clean; failures here never affect the cycle outcome.
_loop_signals --quiet 2>/dev/null || true
trap '_inner_cleanup' EXIT
INNER
  chmod +x "$inner_path"

  cat > "$script_path" << SCRIPT
#!/bin/bash -l
# FIX-050: portable PATH assembly before any brew-tool lookup (tmux, caffeinate
# on some systems, claude). Mirrors the inner script's bootstrap so even when
# launchd's plist EnvironmentVariables is stale, the runner self-repairs.
for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
  case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
done
export PATH
# caffeinate: prevent idle sleep from killing claude during cycles
caffeinate -i -w \$\$ &
# Active-window check — skipped when ROLL_LOOP_FORCE is set (manual 'roll loop now')
if [ -z "\$ROLL_LOOP_FORCE" ]; then
  h=\$(printf '%d' "\$(date +%H)")
  if [ "\$h" -lt ${active_start} ] || [ "\$h" -ge ${active_end} ]; then exit 0; fi
fi
# US-LOOP-019: migrate legacy control-plane files to project-local .roll/loop/.
# Then resolve the runtime dir for all subsequent path lookups.
_loop_migrate_legacy_paths "${slug}"
# US-LOOP-020: one-shot split of the legacy machine-wide runs.jsonl into each
# project's .roll/loop/runs.jsonl, then retire the legacy file. Idempotent.
_loop_migrate_legacy_runs 2>/dev/null || true
# US-AGENT-028: one-shot migrate legacy v1 agent-routes.yaml → v3 agents.yaml.
# Idempotent + atomic — the running loop only sees the new config next cycle.
_agents_migrate_v1_to_v3 2>/dev/null || true
_LOOP_RT_DIR=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")

# Pause check — 'roll loop pause' creates this marker to suspend scheduling
if [ -n "\$_LOOP_RT_DIR" ]; then
  PAUSE="\${_LOOP_RT_DIR}/PAUSE-${slug}"
else
  PAUSE="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/PAUSE-${slug}"
fi
if [ -z "\$ROLL_LOOP_FORCE" ] && [ -f "\$PAUSE" ]; then exit 0; fi
# FIX-037: orphan state detection & self-heal — if state.yaml says running
# but no LOCK process or tmux session exists, the previous cycle was killed
# (e.g. SIGKILL / sleep / terminal close). Heal state to idle so the next
# cycle can proceed normally; write ALERT for transparency.
# FIX-038: heartbeat is the primary liveness signal (avoids PID reuse race);
# LOCK pid check is secondary fallback for backward compatibility.
HEARTBEAT_TIMEOUT="\${ROLL_HEARTBEAT_TIMEOUT:-1800}"
# US-LOOP-019 / FIX-052: per-project STATE_FILE now in project-local .roll/loop/.
if [ -n "\$_LOOP_RT_DIR" ]; then
  STATE_FILE="\${_LOOP_RT_DIR}/state-${slug}.yaml"
else
  STATE_FILE="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/state-${slug}.yaml"
fi
if [ -f "\$STATE_FILE" ]; then
  _state=\$(grep '^status:' "\$STATE_FILE" | awk '{print \$2}' 2>/dev/null || echo "")
  if [ "\$_state" = "running" ]; then
    _still_active=false
    # FIX-038: heartbeat is primary signal
    if [ -n "\$_LOOP_RT_DIR" ]; then
      _heartbeat_file="\${_LOOP_RT_DIR}/.heartbeat-${slug}"
    else
      _heartbeat_file="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/.heartbeat-${slug}"
    fi
    if [ -f "\$_heartbeat_file" ]; then
      _hb_ts=\$(cat "\$_heartbeat_file" 2>/dev/null || echo "0")
      _now=\$(date -u +%s)
      _hb_age=\$(( _now - _hb_ts ))
      if [ "\$_hb_age" -lt "\$HEARTBEAT_TIMEOUT" ]; then
        _still_active=true
      fi
    fi
    # Fallback: LOCK pid check (for cycles without heartbeat, e.g. pre-FIX-038)
    if [ "\$_still_active" = false ]; then
      _lock_file="\$(dirname "\$0")/.LOCK-\$(basename "\$0" .sh | sed 's/^run-//')"
      if [ -f "\$_lock_file" ]; then
        _lock_pid=\$(head -1 "\$_lock_file" 2>/dev/null || echo "")
        [ -n "\$_lock_pid" ] && kill -0 "\$_lock_pid" 2>/dev/null && _still_active=true
      fi
    fi
    # Final: tmux session check
    if [ "\$_still_active" = false ]; then
      command -v tmux >/dev/null 2>&1 && tmux has-session -t "roll-loop-\$(basename "\$0" .sh | sed 's/^run-//')" 2>/dev/null && _still_active=true
    fi
    if [ "\$_still_active" = false ]; then
      echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] FIX-037: orphan state detected (status=running, heartbeat stale or missing) — healing to idle" >> "\$LOG"
      echo "status: idle" > "\${STATE_FILE}.tmp" && mv "\${STATE_FILE}.tmp" "\$STATE_FILE"
      rm -f "\$_lock_file" 2>/dev/null || true
      # US-LOOP-019 / FIX-052: per-project ALERT file in project-local .roll/loop/.
      if [ -n "\$_LOOP_RT_DIR" ]; then
        _alert_file="\${_LOOP_RT_DIR}/ALERT-${slug}.md"
      else
        _alert_file="\$(dirname "\$0")/ALERT-${slug}.md"
      fi
      echo "\$(date '+%Y-%m-%dT%H:%M:%S%z') | FIX-037 auto-heal | Orphan state detected and cleared (status=running → idle)" >> "\$_alert_file" 2>/dev/null || true
      echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] FIX-037: healed to idle, ALERT written" >> "\$LOG"
    fi
  fi
fi
LOCK="\$(dirname "\$0")/.LOCK-\$(basename "\$0" .sh | sed 's/^run-//')"
SESSION="roll-loop-\$(basename "\$0" .sh | sed 's/^run-//')"
INNER_SCRIPT="${inner_path}"
# FIX-139: machine/ops log path is supplied by the caller (now project-local
# .roll/loop/cron.log — see the _write_loop_runner_script call site). Ensure its
# directory exists before the early LOCK-skip echoes append to it.
LOG="${log_path}"
mkdir -p "\$(dirname "\$LOG")" 2>/dev/null || true
# US-LOOP-019: ensure .roll/loop/ is gitignored (idempotent).
if [ -n "\$_LOOP_RT_DIR" ] && [ -f "${project_path}/.gitignore" ]; then
  grep -qFx '.roll/loop/' "${project_path}/.gitignore" 2>/dev/null || \
    echo '.roll/loop/' >> "${project_path}/.gitignore"
fi
if [ -f "\$LOCK" ]; then
  prev_pid=\$(head -1 "\$LOCK" 2>/dev/null || echo "")
  if [ -n "\$prev_pid" ] && kill -0 "\$prev_pid" 2>/dev/null; then
    echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] loop already running (PID \$prev_pid), skipping" >> "\$LOG"
    exit 0
  fi
  rm -f "\$LOCK"
fi
# Guard against stale-LOCK case: if the tmux session is already alive,
# a previous runner's LOCK was removed (e.g. parent terminal closed) but
# the work is still in progress — don't kill it.
if command -v tmux >/dev/null 2>&1 && tmux has-session -t "\$SESSION" 2>/dev/null; then
  echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] tmux session \$SESSION still active, skipping" >> "\$LOG"
  exit 0
fi
echo "\$\$" > "\$LOCK"
trap 'rm -f "\$LOCK"' EXIT
if command -v tmux >/dev/null 2>&1; then
  tmux list-sessions -F "#{session_name}" 2>/dev/null | grep "^roll-loop-${slug}\$" | while read _s; do
    tmux kill-session -t "\$_s" 2>/dev/null || true
  done
  # FIX-132: syntax-check the inner script before spawning the tmux session.
  # A heredoc quoting regression or mid-cycle regeneration can silently produce
  # a syntactically broken script; catching it here prevents the session from
  # starting in a corrupted state and logging a misleading "exited 0, retrying".
  if ! bash -n "\$INNER_SCRIPT" 2>>"\$LOG"; then
    echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] ABORT: inner script failed syntax check — cycle skipped (see log: \$LOG)" >> "\$LOG"
    exit 1
  fi
  # FIX-130: export ROLL_CYCLE_LOG_RAW BEFORE spawning the tmux session so
  # the inner script inherits it (env vars are inherited at spawn time, not
  # retroactively — exporting after new-session means inner never sees it and
  # _inner_cleanup skips log archiving, leaving only orphan .pipe-*.raw files).
  mkdir -p "${project_path}/.roll/cycle-logs"
  # Clean orphan .pipe-*.raw files from previous crashed cycles
  find "${project_path}/.roll/cycle-logs" -name '.pipe-*.raw' -delete 2>/dev/null || true
  CYCLE_LOG_RAW="${project_path}/.roll/cycle-logs/.pipe-\$\$.raw"
  export ROLL_CYCLE_LOG_RAW="\$CYCLE_LOG_RAW"
  tmux new-session -d -s "\$SESSION" -x 200 -y 50 "bash \"\$INNER_SCRIPT\""
  # FIX-139: pane output goes ONLY to the per-cycle raw (-> <CYCLE_ID>.log),
  # no cumulative tee into LOG — per-cycle logs are the single source, all kept
  # (no cap). Machine/ops events still append to LOG directly.
  tmux pipe-pane -t "\$SESSION" "cat >> \"\$ROLL_CYCLE_LOG_RAW\""
  # Auto-attach popup: when not muted, spawn a Terminal.app window attached
  # to the tmux session so the user can watch the loop work in real time.
  # FIX-054: terminal selection removed — fixed to macOS Terminal.app for
  # predictability (per-user detection silently failed on Ghostty upgrades).
  # Uses \`open -g\` so the window appears in the background and does not steal
  # focus. Replaces a prior osascript capture-frontmost / restore-focus dance
  # that triggered LaunchServices "where is <app>" prompts when the active
  # process name differed from its .app bundle name (e.g. MSTeams vs
  # Microsoft Teams.app).
  if [ -z "\${ROLL_LOOP_NO_POPUP:-}" ] && [ -z "\${BATS_TEST_NUMBER:-}" ] && [ ! -f "\${_LOOP_RT_DIR:-\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop}/mute-${slug}" ] && [ "\$(uname)" = "Darwin" ]; then
    _attach_cmd="\${_LOOP_RT_DIR:-\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop}/attach-\$SESSION.command"
    # Drop \`exec\` so the wrapping shell survives \`tmux attach\` exiting,
    # then \`read\` to hold the Terminal open until the user has had a
    # chance to scroll back through the cycle's output. Without this the
    # window closes the instant the tmux session ends (cycle_end kills
    # the session) and the entire scrollback disappears with it; the
    # cron-<slug>.log file still has the full transcript as a fallback.
    # FIX-131: after tmux session ends, open the cron log with less so the
    # user can scroll through the full cycle output instead of seeing nothing.
    # FIX-139: after the session ends, show ONLY this cycle's per-cycle log
    # (newest in .roll/cycle-logs/), not the global cumulative transcript.
    # US-LOOP-040: before the 'press enter to close' prompt, render this
    # cycle's exit summary (built/ci/todo/phases/alerts) so the user sees what
    # happened without scrolling tmux scrollback or opening the cron log. The
    # renderer is silent-fallback (python3 missing / broken JSON → no output,
    # exit 0) so it can never block the prompt below.
    printf '#!/bin/bash\\ntmux attach -t %s 2>/dev/null\\nLOGFILE=\$(ls -t "%s"/.roll/cycle-logs/*.log 2>/dev/null | head -1)\\necho\\nif [ -n "\$LOGFILE" ] && [ -f "\$LOGFILE" ]; then\\n  echo "================================================================"\\n  echo "  Cycle ended  —  showing this cycle log (arrows to scroll, q to close)"\\n  echo "================================================================"\\n  less -R +G "\$LOGFILE"\\nelse\\n  echo "================================================================"\\n  echo "  Cycle ended. Try: roll loop log"\\n  echo "================================================================"\\n  bash %s _loop_render_exit_summary %s 2>/dev/null || true\\n  echo\\n  echo "  press enter to close."\\n  read _\\nfi\\n' \\
      "\$SESSION" "${project_path}" "${roll_bin}" "${slug}" > "\$_attach_cmd" 2>/dev/null || true
    chmod +x "\$_attach_cmd" 2>/dev/null || true
    open -g -a Terminal "\$_attach_cmd" >/dev/null 2>&1 || true
  fi
  _OUTER_TIMEOUT=\$(( \${ROLL_LOOP_CYCLE_TIMEOUT_SEC:-2700} + 300 ))
  _outer_wait_start=\$(date +%s)
  while tmux has-session -t "\$SESSION" 2>/dev/null; do
    sleep 5
    if (( \$(date +%s) - _outer_wait_start > _OUTER_TIMEOUT )); then
      echo "[\$(date '+%Y-%m-%dT%H:%M:%S%z')] FIX-057: outer timeout (\${_OUTER_TIMEOUT}s) — killing tmux session \$SESSION" >> "\$LOG"
      tmux kill-session -t "\$SESSION" 2>/dev/null || true
      break
    fi
  done
else
  bash "\$INNER_SCRIPT" >> "\$LOG" 2>&1
fi
SCRIPT
  chmod +x "$script_path"
}

_launchd_is_loaded() {
  # FIX-098: probe actual launchd registry via `launchctl print`, NOT
  # `launchctl print-disabled`. The disabled-overrides DB only tracks
  # labels explicitly enabled/disabled by the user — after `roll loop off`
  # (bootout) + `roll update` the label stays absent from the overrides DB,
  # so the old grep returned false-positive "loaded". `launchctl print`
  # returns exit 0 only when the agent is actually registered in the current
  # launchd session; non-zero means the label is unknown to launchd.
  launchctl print "gui/$(id -u)/$1" >/dev/null 2>&1
}

# FIX-101 tripwire: refuse to mutate the host's launchd session when
# _LAUNCHD_DIR has been sandboxed (i.e. is not the canonical
# ${HOME}/Library/LaunchAgents). Tests that auto-sandbox _LAUNCHD_DIR for
# isolation (FIX-087) may still forget to set _LAUNCHD_SKIP_REGISTRY=1 or
# stub the launchctl binary; without this defensive layer the production
# label's plist path can get overwritten with a transient sandbox path,
# leading to launchd EX_CONFIG (exit 78) when the tmp dir is later cleaned
# and the next scheduled fire can't find the plist. Read-only ops (print*,
# list, version) are always allowed since they have no side effects.
_launchctl_safe() {
  # Read-only ops are always safe (no host launchd state mutation).
  case "${1:-}" in
    print|print-disabled|list|version|dumpstate|examine)
      launchctl "$@"
      return $?
      ;;
  esac
  # If `launchctl` has been replaced by a function stub (typical in bats tests
  # that want to assert against captured calls), pass through to the stub.
  # Stubs by definition don't touch host launchd, so this is safe; and tests
  # like `_install_launchd_plists: bootout targets gui/<uid>/<label>` rely on
  # the literal call landing in their captured log.
  if [[ "$(type -t launchctl 2>/dev/null)" == "function" ]]; then
    launchctl "$@"
    return $?
  fi
  # Real launchctl binary path: refuse to mutate when _LAUNCHD_DIR has been
  # sandboxed (i.e. is not the canonical ${HOME}/Library/LaunchAgents). This
  # is the FIX-101 defensive layer — when a test forgets to stub launchctl
  # AND has _LAUNCHD_DIR sandboxed, prevent the call from reaching the host's
  # production launchd and overwriting a live label's plist path.
  local canonical="${HOME}/Library/LaunchAgents"
  if [[ "${_LAUNCHD_DIR:-$canonical}" != "$canonical" ]]; then
    return 0
  fi
  launchctl "$@"
}

_launchd_svc_state() {
  # FIX-098: three-state classification:
  #   enabled       — plist on disk AND registered in launchd
  #   stale         — plist on disk BUT NOT registered in launchd
  #   installed-off — kept for back-compat (maps to stale semantics)
  #   not-installed — no plist
  local svc="$1" project_path="$2"
  local label; label=$(_launchd_label "$svc" "$project_path")
  local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
  if _launchd_is_loaded "$label"; then
    echo "enabled"
  elif [[ -f "$plist" ]]; then
    echo "stale"
  else
    echo "not-installed"
  fi
}

# Install launchd plist files (disabled by default) and runner scripts for
# a given project path. Idempotent — skips unchanged files. Does NOT load.
# Schedule times are read from ~/.roll/config.yaml; missing fields are
# auto-derived from the project path hash so different projects don't contend.
_install_launchd_plists() {
  local project_path="$1"
  local sd="${ROLL_HOME}/skills"
  local shared="${_SHARED_ROOT}"

  mkdir -p "$_LAUNCHD_DIR"
  # FIX-194/FIX-195: brief/ci/alert loops retired — only loop/dream/pr remain.
  mkdir -p "${shared}/loop" "${shared}/dream" "${shared}/pr"

  local active_start active_end dream_hour dream_minute loop_period loop_offset
  local _aw; _aw=$(_loop_read_active_window "$project_path")
  active_start="${_aw%% *}"; active_end="${_aw##* }"
  # US-LOOP-012: use _loop_schedule_spec instead of raw loop_minute
  local loop_spec; loop_spec=$(_loop_schedule_spec "$project_path")
  loop_period="${loop_spec%% *}"
  loop_offset="${loop_spec##* }"
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")

  # FIX-054: terminal preference removed — runner always uses Terminal.app.

  # US-AUTO-044: "pr" is the 4th service — a 5-min PR Loop (period=5, empty hour
  # → StartInterval=300). No skill (it drives _loop_pr_inbox, not an agent).
  local services=("loop" "dream" "pr")
  local skill_names=("roll-loop" "roll-.dream" "")
  local periods=("$loop_period" "60" "5")
  local offsets=("$loop_offset" "$dream_minute" "0")
  local hours=("" "$dream_hour" "")

  local updated=0
  local slug; slug=$(_project_slug "$project_path")
  # FIX-058: after FIX-056 introduced realpath normalization, the slug for an
  # existing project may have changed.  Migrate state before creating new files.
  _slug_migrate_from_legacy "$slug" "${shared}/loop"
  # US-OBS-010: when slug changed from path-based to remote-based, merge
  # old records into the new identity (dedup, atomic, .bak backup)
  _slug_migrate_to_remote "$project_path" "${shared}/loop"
  for i in "${!services[@]}"; do
    local svc="${services[$i]}"
    local skill="${skill_names[$i]}"
    local period="${periods[$i]}"
    local offset="${offsets[$i]}"
    local hour="${hours[$i]}"
    local label; label=$(_launchd_label "$svc" "$project_path")
    local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
    local runner="${shared}/${svc}/run-${slug}.sh"
    local cmd; cmd=$(_agent_skill_cmd "${sd}/${skill}/SKILL.md" 2>/dev/null || echo "roll loop now")

    if [[ "$svc" == "loop" ]]; then
      # FIX-139: loop machine/ops log is project-local (.roll/loop/cron.log),
      # not the global cron-<slug>.log. Reader (FIX-137) reads it project-local;
      # per-cycle transcripts live in .roll/cycle-logs/ (single source, no dup).
      local loop_log="${project_path}/.roll/loop/cron.log"
      _write_loop_runner_script "$runner" "$project_path" "cd \"${project_path}\" && ${cmd}" "$loop_log" "$active_start" "$active_end" "${sd}/${skill}/SKILL.md"
    elif [[ "$svc" == "pr" ]]; then
      # US-AUTO-044 Phase 2: lightweight PR Loop runner — drives _loop_pr_inbox
      # every 5 min (no agent, no tmux). It stays sole owner of pr_state (Q1).
      local pr_log="${project_path}/.roll/loop/pr.log"
      mkdir -p "${project_path}/.roll/loop"
      _write_pr_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$pr_log"
    else
      # dream cron log is project-local, mirroring loop (FIX-139).
      local log="${project_path}/.roll/${svc}/cron.log"
      mkdir -p "${project_path}/.roll/${svc}"
      _write_runner_script "$runner" "$project_path" "cd \"${project_path}\" && ${cmd}" "$log"
    fi

    local before=""
    [[ -f "$plist" ]] && before=$(cat "$plist")
    _write_launchd_plist "$plist" "$label" "$project_path" "$period" "$offset" "$hour" "$runner"
    local after; after=$(cat "$plist")
    if [[ "$before" != "$after" ]]; then
      updated=$((updated + 1))
      # FIX-090/FIX-097: gate launchctl writes via central helper so a
      # sandboxed plist never gets registered into the user's REAL gui/<uid>
      # domain. See _launchd_should_skip_registry for the predicate rules.
      if ! _launchd_should_skip_registry; then
        if _launchd_is_loaded "$label"; then
          # FIX-027: use bootout/bootstrap so we don't disturb the label's
          # enabled flag in the launchd overrides db (which legacy
          # unload/load no-`-w` wipes on macOS Sonoma+, causing
          # `roll loop status` to falsely report off after `roll update`).
          local uid; uid=$(id -u)
          _launchctl_safe bootout "gui/${uid}/${label}" 2>/dev/null || true
          _launchctl_safe bootstrap "gui/${uid}" "$plist" 2>/dev/null || true
        elif [[ -z "$before" ]]; then
          # FIX-059: brand-new plist — macOS FSEvents auto-bootstraps any new
          # file dropped in ~/Library/LaunchAgents/, so projects never enabled
          # via 'roll loop on' would fire every hour. Immediately mark disabled
          # in the overrides db to block that auto-load.
          local uid; uid=$(id -u)
          _launchctl_safe disable "gui/${uid}/${label}" 2>/dev/null || true
        fi
      fi
    fi
  done

  if [[ $updated -gt 0 ]]; then
    ok "$(msg agent.launchd_plists_installed_updated_launchagents ${updated})"
    echo "$(msg agent.run_roll_loop_on_to_activate)"
  else
    ok "$(msg agent.launchd_plists_up_to_date_launchagents)"
  fi
}

_agent_skill_cmd() {
  local skill_path="$1"
  # FIX-134: accept an explicit agent (loop routing passes the resolved cycle
  # agent); default to the project agent for non-routed callers.
  local agent="${2:-$(_project_agent)}"
  local strip="awk 'NR==1 && /^---$/{skip=1;next} skip && /^---$/{skip=0;next} !skip{print}' '${skill_path}'"
  _agent_argv "$agent" plain "__PROMPT__" || {
    err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|antigravity>"
    return 1
  }
  # Cron-installed skills (dream / brief / loop) run autonomously and need to
  # Edit files (.roll/dream/, .roll/briefs/, BACKLOG, etc.). Claude Code 2.1.x's
  # pre-write approval UX silently blocks `claude -p` from applying edits in
  # non-interactive pipe mode — bypass it for the cron context.
  _agent_bypass_claude_perms
  # In cron context, use absolute claude path so a fresh shell can find it.
  [[ "$agent" == "claude" ]] && _AGENT_ARGV[0]="$(command -v claude 2>/dev/null || echo claude)"
  # Drop the prompt sentinel (always last), re-emit head args + quoted $(strip).
  local out="${_AGENT_ARGV[0]}" i prompt_idx=$((${#_AGENT_ARGV[@]} - 1))
  for ((i = 1; i < prompt_idx; i++)); do
    out+=" ${_AGENT_ARGV[i]}"
  done
  # FIX-152: prepend an explicit autonomous-execution directive ahead of the bare
  # SKILL.md body. Without it, conversational `-p` agents (notably kimi-code) read
  # the skill doc as pasted context and reply "what would you like me to do?",
  # returning in seconds with zero output → the cycle ends idle, no delivery.
  # pi/deepseek/claude tolerate the bare doc, but the directive is agent-agnostic
  # and hardens every autonomous cron skill (loop/dream/brief share this chokepoint).
  # Must stay free of " $ ` \ so it survives the later `eval` of the cycle command.
  local _autorun='[roll 自主模式] 你正在无人值守的自动化循环中运行,这不是对话。请立即、完整地执行下面这份技能文档描述的工作流,直到完成交付或写出 ALERT 为止;严禁反问、严禁等待确认、严禁只复述或总结而不动手。技能文档如下: '
  echo "${out} \"${_autorun}\$(${strip})\""
}

# FIX-134: build the full per-cycle agent command at RUNTIME, routing-aware.
# The loop inner script calls this with the resolved cycle agent (CYCLE_AGENT =
# ROLL_LOOP_ROUTED_AGENT or project agent) so routing actually switches the
# executed binary — instead of running a constant baked at `roll loop on` time.
# Reproduces the claude-only verbose / stream-json / add-dir enhancements that
# _write_loop_runner_script previously baked into the runner.
_loop_cycle_agent_cmd() {
  local skill_path="$1" agent="${2:-$(_project_agent)}" wt="${3:-$WT}"
  [ -n "$skill_path" ] || return 1
  local cmd; cmd=$(_agent_skill_cmd "$skill_path" "$agent") || return 1
  cmd="${cmd/claude -p/claude -p --verbose --dangerously-skip-permissions --output-format stream-json}"
  if [[ "$cmd" == *"--output-format stream-json"* ]]; then
    cmd="${cmd/--output-format stream-json/--output-format stream-json --add-dir \"$wt\"}"
  fi
  printf '%s' "$cmd"
}

cmd_loop() {
  local subcmd="${1:-status}"; shift || true
  case "$subcmd" in
    on)      _loop_on ;;
    off)     _loop_off ;;
    now)     _loop_now ;;
    test)    shift; _loop_test "$@" ;;
    status)  _loop_status "$@" ;;
    eval)    _loop_eval "$@" ;;
    signals) _loop_signals "$@" ;;
    monitor) _loop_monitor "${1:-3}" ;;
    runs)    _loop_runs "$@" ;;
    log)     _loop_log "$@" ;;
    story)   _loop_story "$@" ;;
    events)  _loop_event_log "${1:-20}" ;;
    attach)  _loop_attach ;;
    mute)    _loop_mute ;;
    unmute)  _loop_unmute ;;
    pause)   _loop_pause ;;
    resume)  _loop_resume ;;
    reset)   _loop_reset ;;
    gc)      shift; _loop_gc "$@" ;;
    notify)               _notify "${1:-roll}" "${2:-}" ;;
    enforce-tcr)          _loop_enforce_tcr "${1:-}" "${2:-}" ;;
    precheck-ci)          _loop_precheck_ci ;;
    hotfix-head-context)  _loop_hotfix_head_context "${1:-}" ;;
    branches)             _loop_branches "$(pwd -P)" ;;
    agent-routes)         _loop_agent_routes "${1:-show}" "${@:2}" ;;
    test-quality-check)   _loop_test_quality_check "$@" ;;
    *)  cat <<'HELP'
Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mute|unmute|pause|resume|reset|gc|branches>

  on            Install launchd scheduler (loop + dream + pr)
  off           Remove launchd scheduler
  now           Run one cycle immediately
  test          Quick smoke test (tmux/popup/stream chain)
  status        Show scheduler state and current loop state
  eval [N]      Result-eval trend over the last N scored cycles (default 14)
  signals       Surface repeated low-score patterns as candidate drafts (📋 待人确认)
  monitor       Live dashboard: launchd status, queue, recent runs
  runs [N]      Show last N run summaries (default 10)
  log [id]      Show per-cycle log (default: latest; optional cycle-id or prefix)
  story <ID>    Show per-story rollup (cycles, duration, tokens, cost, PRs)
  events [N]    Show last N cycle events (default 20)
  attach        Attach to running loop tmux session (Ctrl-B D to detach)
  mute          Suppress auto-attach popup
  unmute        Re-enable auto-attach popup
  pause         Pause scheduling (keep plist, skip execution)
  resume        Resume scheduling after pause
  reset         Clear loop state (start fresh on next fire)
  gc [--dry-run] [--keep-days N]
                Garbage-collect orphan slugs, tmp debris, expired backups
  branches      List loop-related branches
  agent-routes  Show / lint agent routing config (.roll/agent-routes.yaml)

Internal (called by roll-loop SKILL):
  notify        Send macOS notification
  enforce-tcr   Verify TCR commit count for a completed story
  precheck-ci   Check HEAD CI status before scanning BACKLOG

Schedule is configured per-project in .roll/local.yaml:

  loop_schedule:
    period_minutes: 30   # 1-1440 (any minute interval)
    offset_minute: 7     # 0 – (period_minutes - 1)

See guide/en/loop.md for full documentation.
See 'roll config --help' for schedule configuration.
HELP
      exit 1 ;;
  esac
}

_loop_on() {
  local project_path; project_path=$(pwd -P)
  local agent; agent=$(_project_agent)

  local active_start active_end loop_minute dream_hour dream_minute
  local _aw; _aw=$(_loop_read_active_window "$project_path")
  active_start="${_aw%% *}"; active_end="${_aw##* }"
  # US-LOOP-011: read schedule spec from project or global config
  local loop_spec loop_period loop_offset
  loop_spec=$(_loop_schedule_spec "$project_path")
  loop_period="${loop_spec%% *}"
  loop_offset="${loop_spec##* }"
  # Keep loop_minute for Linux crontab backward compat (only supports hourly)
  loop_minute="$loop_offset"
  local loop_sched_en loop_sched_zh
  loop_sched_en=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
  loop_sched_zh=$(_loop_schedule_desc "$loop_period" "$loop_offset" zh)
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")

  if [[ "$(uname)" == "Darwin" ]]; then
    _install_launchd_plists "$project_path" >/dev/null

    # FIX-098: use launchctl bootstrap/enable instead of load -w.
    # `load -w` writes to the disabled-overrides DB which causes FIX-027's
    # re-source to break after `roll update`. bootstrap is idem-potent and
    # does not disturb the overrides DB.
    local uid; uid=$(id -u)
    local all_loaded=true
    for svc in loop dream pr; do
      local label; label=$(_launchd_label "$svc" "$project_path")
      local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
      if ! _launchd_is_loaded "$label"; then
        all_loaded=false
        # FIX-097 guard: skip real launchctl when _LAUNCHD_DIR was auto-sandboxed.
        _launchd_should_skip_registry && continue
        # FIX-098 semantic: enable+bootstrap pair (better than load -w).
        # enable clears any disable-override; bootstrap registers with launchd.
        # FIX-101 wrapper additionally tripwire-gates each call so a sandboxed
        # _LAUNCHD_DIR can't accidentally touch host launchd state.
        _launchctl_safe enable "gui/${uid}/${label}" 2>/dev/null || true
        _launchctl_safe bootstrap "gui/${uid}" "$plist" 2>/dev/null || true
      fi
    done

    if $all_loaded; then
      warn "$(msg loop.loop_already_enabled_for_this_project)"; return 0
    fi

    ok "$(msg loop.loop_enabled)"
    msg loop.roll_loop_s_active_02d_00 \
      "$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
    msg loop.roll_dream_daily_at_02d_02d "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
    echo "  • Agent: ${agent}  (change: roll agent use <name>)"
    return 0
  fi

  # Linux: crontab
  local sd="${ROLL_HOME}/skills"
  if crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}"; then
    warn "$(msg loop.loop_already_enabled_for_this_project_2)"; return 0
  fi

  mkdir -p "${_SHARED_ROOT}/loop" "${_SHARED_ROOT}/dream"

  # FIX-052: per-project cron logs so concurrent projects don't interleave.
  local slug; slug=$(_project_slug "$project_path")
  local loop_cmd dream_cmd
  loop_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-loop/SKILL.md") >> ${_SHARED_ROOT}/loop/cron-${slug}.log 2>&1"
  # IDEA-051: dream cron log is project-local, mirroring loop (FIX-139).
  mkdir -p "${project_path}/.roll/dream"
  dream_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-.dream/SKILL.md") >> ${project_path}/.roll/dream/cron.log 2>&1"

  (
    crontab -l 2>/dev/null
    printf "%d * * * * %s %s:%s\n" "$loop_minute" "$loop_cmd" "$_LOOP_TAG" "$project_path"
    printf "%d %d * * * %s %s:%s\n" "$dream_minute" "$dream_hour" "$dream_cmd" "$_LOOP_TAG" "$project_path"
  ) | crontab -

  ok "$(msg loop.loop_enabled_2)"
  msg loop.roll_loop_s_active_02d_00_2 \
    "$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
  msg loop.roll_dream_daily_at_02d_02d_2 "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
  echo "  • Agent: ${agent}  (change: roll agent use <name>)"
}

_loop_off() {
  local project_path; project_path=$(pwd -P)

  if [[ "$(uname)" == "Darwin" ]]; then
    local any_loaded=false
    local _skip_off; _launchd_should_skip_registry && _skip_off=1 || _skip_off=0
    for svc in loop dream pr; do
      local label; label=$(_launchd_label "$svc" "$project_path")
      if _launchd_is_loaded "$label"; then
        any_loaded=true
        # FIX-097: skip real launchctl in sandbox to avoid touching the user's
        # real launchd registry.
        [[ "$_skip_off" == "1" ]] && continue
        _launchctl_safe unload -w "$(_launchd_plist_path "$svc" "$project_path")" 2>/dev/null || true
      fi
    done
    if ! $any_loaded; then
      warn "$(msg loop.loop_not_enabled_for_this_project)"; return 0
    fi
    local slug; slug=$(_project_slug "$project_path")
    local uid; uid=$(id -u)
    for svc in loop dream pr; do
      rm -f "${_SHARED_ROOT}/${svc}/run-${slug}.sh"
      # FIX-081: reverse the FIX-059 auto-bootstrap guard. `_install_launchd_plists`
      # writes `launchctl disable gui/<UID>/<label>` for every brand-new plist
      # to block macOS FSEvents from auto-bootstrapping it. That write lands in
      # the host's /private/var/db/com.apple.xpc.launchd/disabled.<UID>.plist —
      # it ignores any HOME sandbox. Without a symmetric `enable` on teardown,
      # every short-lived project leaves 3 permanent ghost labels in the host's
      # disable list, polluting `launchctl print-disabled` forever even after
      # the project dir, plists, and ~/.roll are gone.
      local label; label=$(_launchd_label "$svc" "$project_path")
      # FIX-097: same gate — never touch host launchctl from a sandbox.
      [[ "$_skip_off" == "1" ]] && continue
      _launchctl_safe enable "gui/${uid}/${label}" 2>/dev/null || true
    done
    ok "$(msg loop.loop_disabled)"
    return 0
  fi

  # Linux: crontab
  if ! crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}"; then
    warn "$(msg loop.loop_not_enabled_for_this_project_2)"; return 0
  fi
  crontab -l 2>/dev/null | grep -v "${_LOOP_TAG}:${project_path}" | crontab -
  ok "$(msg loop.loop_disabled_2)"
}

_loop_is_active() {
  # Three-level liveness probe used by FIX-037 heal and `roll loop now`.
  # Returns 0 if any signal says the cycle is alive, 1 if all signals are dead.
  # Heartbeat is primary (FIX-038); LOCK PID and tmux session are fallbacks.
  # ROLL_HEARTBEAT_TIMEOUT (default 1800s) matches the outer heredoc's threshold.
  local slug="${1:?slug required}"
  local timeout="${ROLL_HEARTBEAT_TIMEOUT:-1800}"
  local hb_file="${_SHARED_ROOT}/loop/.heartbeat-${slug}"
  if [[ -f "$hb_file" ]]; then
    local ts; ts=$(cat "$hb_file" 2>/dev/null || echo "")
    if [[ "$ts" =~ ^[0-9]+$ ]]; then
      local age=$(( $(date -u +%s) - ts ))
      [[ $age -lt $timeout ]] && return 0
    fi
  fi
  local lock="${_SHARED_ROOT}/loop/.LOCK-${slug}"
  if [[ -f "$lock" ]]; then
    local pid; pid=$(head -1 "$lock" 2>/dev/null || echo "")
    [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null && return 0
  fi
  command -v tmux >/dev/null 2>&1 && tmux has-session -t "roll-loop-${slug}" 2>/dev/null && return 0
  return 1
}

_loop_now() {
  local project_path; project_path=$(pwd -P)
  local slug; slug=$(_project_slug "$project_path")
  # Manual `roll loop now` must not bypass FIX-037 heal: if state says running
  # but no live signal exists, this trigger is the canonical recovery point.
  if [[ -f "$_LOOP_STATE" ]] && grep -q "status: running" "$_LOOP_STATE" 2>/dev/null; then
    if _loop_is_active "$slug"; then
      warn "$(msg loop.loop_already_running_loop)"; return 0
    fi
    info "$(msg loop.stale_running_state_detected_healing_before)"
    printf "status: idle\n" > "$_LOOP_STATE"
    rm -f "${_SHARED_ROOT}/loop/.LOCK-${slug}" 2>/dev/null || true
  fi
  # Invoke the SAME runner script that launchd would invoke — same tmux,
  # same --verbose, same LOCK, same auto-attach popup. ROLL_LOOP_FORCE
  # bypasses only the active-window check (manual triggers aren't time-gated).
  local runner="${_SHARED_ROOT}/loop/run-${slug}.sh"
  if [[ ! -f "$runner" ]]; then
    err "Runner script not found: ${runner}"
    err "Run 'roll setup' or 'roll loop on' first to generate it."
    return 1
  fi
  info "$(msg loop.starting_new_loop_cycle)"
  ROLL_LOOP_FORCE=1 bash "$runner"
  # Reset stale running state if the cycle exited without cleanup (e.g. API error, SIGKILL)
  if [[ -f "$_LOOP_STATE" ]] && grep -q "^status: running" "$_LOOP_STATE" 2>/dev/null; then
    printf "status: idle\n" > "$_LOOP_STATE"
  fi
}

_loop_test() {
  local project_path; project_path=$(pwd -P)
  local slug; slug=$(_project_slug "$project_path")
  local runner="${_SHARED_ROOT}/loop/run-${slug}.sh"
  if [[ ! -f "$runner" ]]; then
    err "Runner not found: ${runner}"
    err "Run 'roll loop on' first to generate it."
    return 1
  fi
  # FIX-052: per-project log so test cycle output doesn't mix with other projects'.
  local log="${_SHARED_ROOT}/loop/cron-${slug}.log"
  local test_runner="${_SHARED_ROOT}/loop/run-${slug}-test.sh"

  # US-LOOP-010: --agent <name> lets integration tests exercise the
  # multi-agent passthrough path without needing a real pi binary.
  local agent="claude"
  local agent_cmd=""
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --agent) agent="${2:-claude}"; shift 2 ;;
      --cmd)   agent_cmd="${2}"; shift 2 ;;
      *)       shift ;;
    esac
  done
  if [[ -z "$agent_cmd" ]]; then
    if [[ "$agent" == "claude" ]]; then
      agent_cmd='claude -p "Reply with a single word: hello"; sleep 10'
    else
      agent_cmd="echo 'mock ${agent} output line 1'; echo 'mock ${agent} output line 2'"
    fi
  fi

  # FIX-054: terminal preference removed — runner always uses Terminal.app.
  local active_start active_end
  local _aw; _aw=$(_loop_read_active_window "$project_path")
  active_start="${_aw%% *}"; active_end="${_aw##* }"

  info "$(msg loop.generating_test_runner_agent ${agent})"
  _write_loop_runner_script "$test_runner" "$project_path" \
    "${agent_cmd}" \
    "$log" "$active_start" "$active_end"

  info "$(msg loop.starting_smoke_test_agent ${agent})"
  info "Watch for: tmux session + terminal popup + stream-json events flowing"
  info "$(msg loop.observing_tmux_session_terminal_popup_stream)"

  local start_time; start_time=$(date +%s)
  ROLL_LOOP_FORCE=1 bash "$test_runner"
  local exit_code=$?
  local elapsed=$(( $(date +%s) - start_time ))

  if [[ $exit_code -eq 0 ]]; then
    ok "$(msg loop.smoke_test_passed_s_agent_smoke ${elapsed} ${agent})"
  else
    err "$(msg loop.smoke_test_failed_exit_s_agent ${exit_code} ${elapsed} ${agent})"
    return 1
  fi
}

# US-EVAL-003: `roll loop eval [N]` — objective result-eval trend over the last
# N scored cycles. Reads each runs.jsonl record's `result_eval` block (written
# by US-EVAL-002) and prints mean/min cycle score, per-dimension hit-rate, and a
# trend arrow. Distinct from skill self-score. Delegates aggregation to the
# Python view (--eval), which shares load_runs/result_eval logic with the dash.
_loop_eval() {
  if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
    cat <<'HELP'
Usage: roll loop eval [N]

  Result-eval trend over the last N scored cycles (default 14).
  Reads each runs.jsonl record's result_eval block and reports the mean and
  minimum cycle score (1..10), each rubric dimension's hit-rate, and a trend
  arrow. Cycles without a result_eval (older schema) are skipped. With fewer
  than 3 scored cycles, prints an "(n/a) need 3" notice.

  近 N 轮 cycle 的结果评分趋势（默认 14）。
  读取每条 runs.jsonl 的 result_eval，输出均分 / 最低分 / 各维度命中率 / 趋势箭头。
  无 result_eval 的旧记录跳过；样本不足 3 个时提示 (n/a) need 3。

Examples:
  roll loop eval
  roll loop eval 30
HELP
    return 0
  fi
  local n="${1:-}"
  if [[ -n "$n" && ! "$n" =~ ^[0-9]+$ ]]; then
    err "roll loop eval: N must be a positive integer (got '$n')"
    return 1
  fi
  python3 "${ROLL_PKG_DIR}/lib/roll-loop-status.py" --eval ${n:+$n}
}

# US-EVAL-004: self-evolution signals — surface repeated low-score patterns.
#
# Reads the project's scored runs.jsonl, asks the rubric's pure detector
# (lib/loop_result_eval.py --signals) for any dimension that has been low for
# N cycles in a row, then for each *fresh* signal (deduped on its stable key
# against .roll/loop/signals-seen) appends a CANDIDATE backlog draft to
# .roll/signals/candidates.md marked "📋 待人确认". It never edits the real
# backlog, never activates a story, never touches code — it only exposes.
#
# Surfaced candidates feed the brief's improvement-signal section; the brief
# skill reads candidates.md. `roll loop signals` runs this on demand and the
# cycle-finish hook calls it once per cycle so standing patterns get raised
# exactly once.
_loop_signals() {
  local quiet=false streak=3
  while [[ $# -gt 0 ]]; do
    case "$1" in
      -h|--help)
        cat <<'HELP'
Usage: roll loop signals [--streak N] [--quiet]

  Detect repeated low-score patterns in the cycle result-eval history and
  expose them as improvement signals: each fresh pattern appends a CANDIDATE
  backlog draft (IDEA/FIX, marked 📋 待人确认) to .roll/signals/candidates.md.
  Deduped per pattern, so a standing issue is raised once, not every cycle.
  Never edits the real backlog, activates a story, or changes code.

  检测 cycle 结果评分中反复出现的低分模式，暴露成改善信号：每个新模式向
  .roll/signals/candidates.md 追加一条候选 backlog 草稿（IDEA/FIX，标 📋 待人确认）。
  按模式去重，同一问题只提一次；绝不改真实 backlog / 激活故事 / 改代码。

Options:
  --streak N   consecutive low cycles required to fire (default 3)
  --quiet      suppress the "no new signals" line (used by the cycle hook)
HELP
        return 0 ;;
      --streak) streak="${2:-3}"; shift 2 ;;
      --streak=*) streak="${1#--streak=}"; shift ;;
      --quiet) quiet=true; shift ;;
      *) shift ;;
    esac
  done

  # US-LOOP-020: read the project-local runs.jsonl.
  local runs_src; runs_src=$(_loop_runs_file)
  if [[ ! -f "$runs_src" ]] || [[ ! -s "$runs_src" ]]; then
    $quiet || echo "$(msg loop.no_loop_runs_yet 2>/dev/null || echo 'no loop runs yet')"
    return 0
  fi
  command -v jq >/dev/null 2>&1 || { $quiet || err "jq required for roll loop signals"; return 0; }

  # Prefer the main project dir (inner runner runs from a worktree, so pwd is
  # the wrong tree — candidates/dedup must land in the canonical .roll/).
  local project_path; project_path="${ROLL_MAIN_PROJECT:-$(pwd -P)}"
  local project_slug; project_slug=$(_project_slug "$project_path")

  # Records for this project, oldest→newest, as a JSON array for the detector.
  local records
  records=$(jq -s -c --arg p "$project_slug" \
    '[ .[] | select(.project == $p) ]' "$runs_src" 2>/dev/null) || records="[]"

  local signals
  signals=$(printf '%s' "$records" \
    | python3 "${ROLL_PKG_DIR}/lib/loop_result_eval.py" --signals --streak "$streak" 2>/dev/null) \
    || signals="[]"
  [[ -n "$signals" ]] || signals="[]"

  # Dedup store + candidate drafts both live with the project (roll-meta side).
  local rt_dir="${_LOOP_RT_DIR:-${project_path}/.roll/loop}"
  local seen_file="${rt_dir}/signals-seen-${project_slug}"
  local cand_file="${project_path}/.roll/signals/candidates.md"
  mkdir -p "$rt_dir" "$(dirname "$cand_file")" 2>/dev/null || true
  touch "$seen_file" 2>/dev/null || true

  # next candidate id (CAND-NNN) — scan existing candidates so re-runs keep counting up.
  local last_id=0
  if [[ -f "$cand_file" ]]; then
    last_id=$(grep -oE 'CAND-[0-9]+' "$cand_file" 2>/dev/null | grep -oE '[0-9]+' | sort -n | tail -1)
    [[ -n "$last_id" ]] || last_id=0
  fi

  local new_count=0
  local keys; keys=$(printf '%s' "$signals" | jq -r '.[].key' 2>/dev/null)
  local key
  while IFS= read -r key; do
    [[ -n "$key" ]] || continue
    # Dedup: same standing pattern is surfaced once.
    if grep -qxF "$key" "$seen_file" 2>/dev/null; then
      continue
    fi
    local kind summary dim streak_n
    kind=$(printf '%s' "$signals" | jq -r --arg k "$key" '.[] | select(.key==$k) | .kind' 2>/dev/null)
    summary=$(printf '%s' "$signals" | jq -r --arg k "$key" '.[] | select(.key==$k) | .summary' 2>/dev/null)
    dim=$(printf '%s' "$signals" | jq -r --arg k "$key" '.[] | select(.key==$k) | .dim' 2>/dev/null)
    streak_n=$(printf '%s' "$signals" | jq -r --arg k "$key" '.[] | select(.key==$k) | .streak' 2>/dev/null)
    [[ -n "$kind" && "$kind" != "null" ]] || kind="IDEA"

    last_id=$(( last_id + 1 ))
    local cand_id; cand_id=$(printf 'CAND-%03d' "$last_id")
    local ts; ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
    {
      printf '\n## %s — %s (%s) 📋 待人确认\n' "$cand_id" "$dim" "$kind"
      printf -- '- Detected: %s\n' "$ts"
      printf -- '- Pattern: %s\n' "$key"
      printf -- '- Signal: %s\n' "$summary"
      printf -- '- 信号：result-eval 维度 %s 连续 %s 轮低分；候选 %s，待人确认后再激活。\n' \
        "$dim" "$streak_n" "$kind"
    } >> "$cand_file"

    printf '%s\n' "$key" >> "$seen_file"
    new_count=$(( new_count + 1 ))
    $quiet || echo "signal: $cand_id $dim → candidate $kind ($summary)"
  done <<< "$keys"

  if [[ "$new_count" -eq 0 ]]; then
    $quiet || echo "no new improvement signals (result-eval patterns)"
    return 0
  fi
  $quiet || echo "${new_count} candidate draft(s) → ${cand_file} (📋 待人确认, not activated)"
  return 0
}

_loop_status() {
  # FIX-060: backfill merged PRs before rendering — independent of cycle ticks,
  # so dashboard reflects merges that happened while loop was paused.
  _loop_backfill_merged >/dev/null 2>&1 || true
  # ROLL_UI=v2 (default) routes to the redesigned Python view.
  # Set ROLL_UI=v1 to fall back to the legacy bash implementation.
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-loop-status.py" "$@"
    return
  fi
  _legacy_loop_status "$@"
}

_loop_story() {
  if [[ -z "${1:-}" || "$1" == "-h" || "$1" == "--help" ]]; then
    cat <<'HELP'
Usage: roll loop story <STORY-ID> [--days N] [--json]

  Show a per-story rollup across cycles: count, span, duration, tokens,
  cost, model, PR landings, and the last 3 cycles. Story ID is case-
  insensitive (us-loop-004 == US-LOOP-004).

Examples:
  roll loop story US-LOOP-004
  roll loop story us-loop-004 --days 90
  roll loop story US-LOOP-004 --json | jq .cost
HELP
    return 1
  fi
  python3 "${ROLL_PKG_DIR}/lib/roll-loop-story.py" "$@"
}

_legacy_loop_status() {
  local project_path; project_path=$(pwd -P)
  local agent; agent=$(_project_agent)
  local _is_paused=false
  [[ -f "$_LOOP_STATE" ]] && grep -q "^status: paused" "$_LOOP_STATE" 2>/dev/null && _is_paused=true
  echo ""
  if [[ "$(uname)" == "Darwin" ]]; then
    echo -e "  Services   Agent: ${CYAN}${agent}${NC}"
    for svc in loop dream pr; do
      local state; state=$(_launchd_svc_state "$svc" "$project_path")
      if [[ "$svc" == "loop" ]] && $_is_paused; then
        local _paused_at; _paused_at=$(grep '^paused_at:' "$_LOOP_STATE" 2>/dev/null | awk '{print $2}' | tr -d '"')
        local _dur=""
        if [[ -n "$_paused_at" ]]; then
          local _pe _ne _sec
          _pe=$(date -d "$_paused_at" +%s 2>/dev/null || date -jf "%Y-%m-%dT%H:%M:%SZ" "$_paused_at" +%s 2>/dev/null || echo 0)
          _ne=$(date +%s); _sec=$(( _ne - _pe ))
          _dur="  ($(( _sec / 3600 ))h $(( (_sec % 3600) / 60 ))m ago)"
        fi
        echo -e "    ${YELLOW}loop     ⏸ paused${NC}${_dur}   run: roll loop resume"
      else
        local _tick_age=""
        case "$svc" in pr)
          _tick_age=$(_loop_tick_age "$svc")
          [ -n "$_tick_age" ] && _tick_age="  tick ${_tick_age}"
        esac
        case "$state" in
          enabled)       echo -e "    ${GREEN}${svc}     ● enabled${NC}${_tick_age}" ;;
          stale|installed-off) echo -e "    ${YELLOW}${svc}     ⚠ STALE — plist present but not loaded${NC}   run: roll loop on" ;;
          not-installed) echo -e "    ${RED}${svc}     ○ not installed${NC}   run: roll setup" ;;
        esac
      fi
    done
  else
    local loop_enabled=false
    crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}" && loop_enabled=true
    if $_is_paused; then
      echo -e "  Scheduler  ${YELLOW}⏸ paused${NC}   run: roll loop resume"
    elif $loop_enabled; then
      echo -e "  Scheduler  ${GREEN}● enabled${NC}   Agent: ${CYAN}${agent}${NC}"
    else
      echo -e "  Scheduler  ${YELLOW}○ disabled${NC}   run: roll loop on"
    fi
  fi
  echo ""
  if [[ -f "$_LOOP_MUTE_FILE" ]]; then
    echo -e "  Auto-attach  ${YELLOW}muted${NC}   run: roll loop unmute"
  else
    echo -e "  Auto-attach  ${GREEN}live${NC}    run: roll loop mute"
  fi
  [[ -f "$_LOOP_ALERT" ]] && { echo ""; echo -e "  ${RED}⚠ ALERT${NC}  (${CYAN}roll alert${NC} to manage)"; sed 's/^/    /' "$_LOOP_ALERT"; }
  [[ -f "$_LOOP_STATE" ]] && { echo ""; echo "  State:"; sed 's/^/    /' "$_LOOP_STATE"; }
  echo ""
}

_loop_pause() {
  local project_path; project_path=$(pwd -P)
  local paused_at; paused_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)

  if [[ "$(uname)" == "Darwin" ]]; then
    local label; label=$(_launchd_label "loop" "$project_path")
    if ! _launchd_is_loaded "$label"; then
      warn "$(msg loop.loop_not_enabled_nothing_to_pause)"; return 0
    fi
    # FIX-097: never touch host launchctl from a sandboxed plist path.
    if ! _launchd_should_skip_registry; then
      _launchctl_safe unload -w "$(_launchd_plist_path "loop" "$project_path")" 2>/dev/null || true
    fi
  else
    local slug; slug=$(_project_slug "$project_path")
    local rt_dir; rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
    if [ -n "$rt_dir" ]; then
      mkdir -p "$rt_dir"
      touch "${rt_dir}/PAUSE-${slug}"
    else
      mkdir -p "${_SHARED_ROOT}/loop"
      touch "${_SHARED_ROOT}/loop/PAUSE-${slug}"
    fi
  fi

  mkdir -p "$(dirname "$_LOOP_STATE")"
  printf 'status: paused\npaused_at: "%s"\npaused_reason: manual\n' "$paused_at" > "$_LOOP_STATE"
  ok "$(msg loop.loop_paused)"
}

_loop_resume() {
  local project_path; project_path=$(pwd -P)

  # Scheduler resume: loop was manually paused via `roll loop pause`
  if [[ -f "$_LOOP_STATE" ]] && grep -q "^status: paused" "$_LOOP_STATE" 2>/dev/null; then
    if [[ "$(uname)" == "Darwin" ]]; then
      local label; label=$(_launchd_label "loop" "$project_path")
      local plist; plist=$(_launchd_plist_path "loop" "$project_path")
      if [[ -f "$plist" ]] && ! _launchd_should_skip_registry; then
        # FIX-097: never touch host launchctl from a sandboxed plist path.
        _launchctl_safe load -w "$plist" 2>/dev/null || true
      fi
    else
      local slug; slug=$(_project_slug "$project_path")
      local rt_dir; rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
      if [ -n "$rt_dir" ]; then
        rm -f "${rt_dir}/PAUSE-${slug}"
      fi
      rm -f "${_SHARED_ROOT}/loop/PAUSE-${slug}"
    fi
    printf "status: idle\n" > "$_LOOP_STATE"
    ok "$(msg loop.loop_resumed)"
    return 0
  fi

  # Interrupt resume: loop was running a Story and crashed
  if [[ ! -f "$_LOOP_STATE" ]]; then
    warn "$(msg loop.no_loop_state_found_nothing_to)"; return 0
  fi
  if grep -q "status: running" "$_LOOP_STATE" 2>/dev/null; then
    warn "$(msg loop.loop_already_running_loop_2)"; return 0
  fi
  info "$(msg loop.resuming_loop_from_last_state)"
  _agent_run_skill "roll-loop"
}

_loop_reset() {
  if [[ -f "$_LOOP_STATE" ]]; then
    rm -f "$_LOOP_STATE"
    ok "$(msg loop.loop_state_cleared_will_start_fresh)"
  else
    info "$(msg loop.no_loop_state_to_clear)"
  fi
  rm -rf "$(_loop_heal_dir)"
}

# Suppress the auto-attach popup. When the marker file exists, runner scripts
# skip the osascript Terminal-popup step on next fire. Loop output still goes
# to tmux + log; users can run `roll loop attach` manually.
_loop_mute() {
  mkdir -p "$(dirname "$_LOOP_MUTE_FILE")"
  : > "$_LOOP_MUTE_FILE"
  ok "$(msg loop.muted_auto_attach_disabled)"
}

# Re-enable the auto-attach popup.
_loop_unmute() {
  rm -f "$_LOOP_MUTE_FILE"
  ok "$(msg loop.unmuted_auto_attach_live)"
}

# Attach to the tmux session a running loop iteration writes to. Returns 1 when
# tmux is missing or no session exists for the current project.
_loop_attach() {
  local project_path; project_path=$(pwd -P)
  local slug; slug=$(_project_slug "$project_path")
  local session="roll-loop-${slug}"

  if ! command -v tmux >/dev/null 2>&1; then
    warn "$(msg loop.tmux_not_installed_install_with_brew)"
    return 1
  fi

  if ! tmux has-session -t "$session" 2>/dev/null; then
    info "$(msg loop.no_running_loop_session_for_this)"
    info "Wait for next scheduled fire, or run: roll loop now"
    return 1
  fi

  exec tmux attach -t "$session"
}

# FIX-125: detect whether we are running inside a loop cycle. Cycle context
# is signalled by env vars exported by the cycle runner (ROLL_LOOP_AGENT,
# bin/roll:5736) or by the outer cycle script (ROLL_CYCLE_LOG_RAW,
# bin/roll:6044). Used by callers that touch canonical ${HOME}/Library/LaunchAgents
# directly (_loop_gc, cmd_offboard) to refuse host-loop mutations from inside
# a cycle. Read-only ops are unaffected.
_loop_in_cycle() {
  [[ -n "${ROLL_LOOP_AGENT:-}" || -n "${ROLL_CYCLE_LOG_RAW:-}" ]]
}

# US-LOOP-021: garbage-collect orphan slugs, tmp debris, and expired backups.
# Usage: _loop_gc [--dry-run] [--keep-days N]
# Keeps backups/migrated files within N days (default 30).
# Retention order: ROLL_LOOP_GC_RETENTION_DAYS env > .roll/local.yaml loop_gc.retention_days > 30.
_loop_gc() {
  # FIX-125: refuse from inside a loop cycle. Phase 1 below scans/mutates
  # ${HOME}/Library/LaunchAgents directly (bin/roll:6814,6847) — running it
  # from a cycle would let one project's tick remove another project's plist
  # under the host's launchd domain. Read-only ops (status, runs) are
  # unaffected; only the GC mutator is gated.
  if _loop_in_cycle; then
    echo "roll loop gc: refusing — cycle-context tripwire (FIX-125)" >&2
    echo "  This command scans ~/Library/LaunchAgents directly. Running it" >&2
    echo "  from inside a loop cycle is a known host-state corruption path." >&2
    return 1
  fi

  local dry_run=false
  local keep_days=30

  # Parse arguments
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --dry-run) dry_run=true; shift ;;
      --keep-days) keep_days="$2"; shift 2 ;;
      *) shift ;;
    esac
  done

  # Retention config: env > local.yaml > default
  if [[ -n "${ROLL_LOOP_GC_RETENTION_DAYS:-}" ]]; then
    keep_days="$ROLL_LOOP_GC_RETENTION_DAYS"
  elif [[ -f .roll/local.yaml ]]; then
    local yaml_val; yaml_val=$(_config_read_int "loop_gc.retention_days" "" 2>/dev/null || true)
    [[ -n "$yaml_val" ]] && keep_days="$yaml_val"
  fi

  local loop_dir="${_SHARED_ROOT:-${HOME}/.shared/roll}/loop"
  # Always use canonical LaunchAgents — auto-sandbox only redirects writes.
  local plist_dir="${HOME}/Library/LaunchAgents"
  local now_epoch; now_epoch=$(date +%s)
  local gc_count=0

  # ── Phase 1: orphan slug detection ──
  local slug plist proj
  for plist in "$plist_dir"/com.roll.loop.*.plist; do
    [[ -f "$plist" ]] || continue
    local fname; fname=$(basename "$plist" .plist)
    # Extract slug: com.roll.loop.<slug> → <slug>
    slug="${fname#com.roll.loop.}"
    [[ -z "$slug" ]] && continue

    # Resolve project path from plist WorkingDirectory key
    # (inlined from _loop_resolve_project_path — US-LOOP-018)
    proj=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)

    if [[ -z "$proj" || ! -d "$proj" ]]; then
      # Expired slug — project directory doesn't exist
      local ts; ts=$(date +%Y%m%dT%H%M%S)
      local archive_dir="${loop_dir}/archived/${slug}-${ts}"

      if $dry_run; then
        echo "[DRY-RUN] orphan slug: $slug (project not found)"
        gc_count=$((gc_count + 1))
        continue
      fi

      echo "gc: archiving orphan slug $slug"
      mkdir -p "$archive_dir"

      # Move runner scripts
      for f in "${loop_dir}/run-${slug}.sh" \
               "${loop_dir}/run-${slug}-inner.sh" \
               "${loop_dir}/attach-roll-loop-${slug}.command"; do
        [[ -f "$f" ]] && mv "$f" "$archive_dir/"
      done

      # Move plist (it's already unloaded since project dir is gone)
      [[ -f "$plist" ]] && mv "$plist" "$archive_dir/"

      # Clean up data files for this orphan slug (FIX-143)
      for f in "${loop_dir}/state-${slug}.yaml" \
               "${loop_dir}/runs-${slug}.jsonl" \
               "${loop_dir}/events-${slug}.ndjson" \
               "${loop_dir}/cron-${slug}.log"; do
        [[ -f "$f" ]] && rm -f "$f"
      done

      gc_count=$((gc_count + 1))
    fi
  done

  # ── Phase 2: tmp debris cleanup ──

  # runs.jsonl.tmp.* — always safe to remove (FIX-123 cleans stale on entry,
  # but files from cycles that hard-crashed without the FIX-123 trap may remain)
  local f
  for f in "$loop_dir"/runs.jsonl.tmp.*; do
    [[ -f "$f" ]] || continue
    if $dry_run; then
      echo "[DRY-RUN] tmp debris: $(basename "$f")"
    else
      rm -f "$f"
    fi
    gc_count=$((gc_count + 1))
  done

  # backup-before-merge-*.tgz older than 5 days
  local cutoff_5d; cutoff_5d=$((now_epoch - 5 * 86400))
  for f in "$loop_dir"/backup-before-merge-*.tgz; do
    [[ -f "$f" ]] || continue
    local mtime; mtime=$(_file_mtime "$f")
    if [[ "$mtime" -lt "$cutoff_5d" ]]; then
      if $dry_run; then
        echo "[DRY-RUN] old backup: $(basename "$f")"
      else
        rm -f "$f"
      fi
      gc_count=$((gc_count + 1))
    fi
  done

  # *.migrated-* older than 7 days
  local cutoff_7d; cutoff_7d=$((now_epoch - 7 * 86400))
  for f in "$loop_dir"/*.migrated-*; do
    [[ -f "$f" ]] || continue
    local mtime; mtime=$(_file_mtime "$f")
    if [[ "$mtime" -lt "$cutoff_7d" ]]; then
      if $dry_run; then
        echo "[DRY-RUN] old migrated: $(basename "$f")"
      else
        rm -f "$f"
      fi
      gc_count=$((gc_count + 1))
    fi
  done

  # *.bak files (events/runs backups) older than keep_days (FIX-143)
  local cutoff_keep; cutoff_keep=$((now_epoch - keep_days * 86400))
  for f in "$loop_dir"/*.bak; do
    [[ -f "$f" ]] || continue
    local mtime; mtime=$(_file_mtime "$f")
    if [[ "$mtime" -lt "$cutoff_keep" ]]; then
      if $dry_run; then
        echo "[DRY-RUN] old .bak: $(basename "$f")"
      else
        rm -f "$f"
      fi
      gc_count=$((gc_count + 1))
    fi
  done

  if $dry_run; then
    echo "gc: dry-run complete ($gc_count items would be cleaned)"
  else
    echo "gc: $gc_count items cleaned, keep-days=$keep_days"
  fi
}

# Pretty-print a duration in seconds as "Xs" / "Ym" / "Yh Zm".
# US-VIEW-019: compute slowest phase + % from a JSON line's phases object.
# Returns "<abbr> <pct>%" (e.g. "claude 97%") or empty when no phases data.
# Abbreviations match the AC: agent_invoke→agent, publish_wait_merge→pr-wait,
# publish_push→publish, worktree_setup→worktree; others unchanged.
_loop_runs_slowest_phase() {
  local line="$1"
  command -v jq >/dev/null 2>&1 || return 0
  local total max_name max_dur
  total=$(jq -r '(.phases // {}) | to_entries | map(.value) | add // 0' <<<"$line")
  [ -z "$total" ] || [ "$total" = "0" ] || [ "$total" = "null" ] && return 0
  max_name=$(jq -r '(.phases // {}) | to_entries | sort_by(-.value) | .[0].key // ""' <<<"$line")
  max_dur=$(jq -r '(.phases // {}) | to_entries | sort_by(-.value) | .[0].value // 0' <<<"$line")
  [ -z "$max_name" ] && return 0
  local abbr="$max_name"
  case "$max_name" in
    agent_invoke) abbr="agent" ;;
    publish_wait_merge) abbr="pr-wait" ;;
    publish_push) abbr="publish" ;;
    worktree_setup) abbr="worktree" ;;
  esac
  local pct=$(( (max_dur * 100 + total / 2) / total ))
  printf '%s %d%%' "$abbr" "$pct"
}

# US-VIEW-019: render the full Phase Breakdown panel for a cycle by re-using
# the same shape that the inner runner script prints. Reads runs.jsonl,
# locates the row by cycle_id, prints sorted-desc table.
_loop_runs_detail() {
  local cycle_id="$1"
  command -v jq >/dev/null 2>&1 || { err "jq required for --detail"; return 1; }
  local runs_src; runs_src=$(_loop_runs_file)
  if [[ ! -f "$runs_src" ]]; then
    echo "$(msg loop.no_runs_jsonl_yet)"
    return 0
  fi
  local row
  row=$(jq -c --arg cid "$cycle_id" 'select(.cycle_id == $cid)' "$runs_src" | head -1)
  if [[ -z "$row" ]]; then
    echo "$(msg loop.cycle_not_found $cycle_id)"
    return 1
  fi
  local has_phases
  has_phases=$(jq -r '(.phases // {}) | length' <<<"$row")
  if [[ "$has_phases" == "0" ]]; then
    echo "$(msg loop.cycle_has_no_phases_data_pre $cycle_id)"
    return 0
  fi
  echo ""
  echo "─── Cycle $cycle_id Phase Breakdown ───"
  local total
  total=$(jq -r '(.phases // {}) | to_entries | map(.value) | add // 0' <<<"$row")
  [ "$total" -le 0 ] && total=1
  jq -r '(.phases // {}) | to_entries | sort_by(-.value) | .[] | "\(.value) \(.key)"' <<<"$row" \
    | while read -r dur name; do
        [[ -z "$name" ]] && continue
        local pct=$(( (dur * 1000) / total ))
        local pct_str
        pct_str=$(printf '%d.%d%%' $((pct / 10)) $((pct % 10)))
        local bar="" bar_len=$(( (dur * 20) / total ))
        [ "$bar_len" -gt 0 ] && bar=$(printf '█%.0s' $(seq 1 "$bar_len"))
        printf '  %-22s %6ds  (%6s)  %s\n' "$name" "$dur" "$pct_str" "$bar"
      done
  echo "  ──────────────────────────────────────"
  printf '  %-22s %6ds\n\n' "Total" "$total"
}

_loop_runs_dur() {
  local s="${1:-0}"
  if [[ "$s" -lt 60 ]]; then printf "%ds" "$s"
  elif [[ "$s" -lt 3600 ]]; then printf "%dm" "$((s / 60))"
  else printf "%dh %dm" "$((s / 3600))" "$(((s % 3600) / 60))"
  fi
}

# Format a single JSONL run record for display.
# Reads _LOOP_RUNS_BACKLOG global for ID→description lookup (set by _loop_runs).
_loop_runs_format_line() {
  local line="$1" show_project="$2" is_darwin="$3"
  command -v jq >/dev/null 2>&1 || { echo "  (jq required)"; return 0; }

  local ts status project tcr duration alerts run_id reason built_count skipped_count
  ts=$(jq -r '.ts // ""' <<<"$line")
  status=$(jq -r '.status // ""' <<<"$line")
  project=$(jq -r '.project // ""' <<<"$line")
  tcr=$(jq -r '.tcr_count // 0' <<<"$line")
  duration=$(jq -r '.duration_sec // 0' <<<"$line")
  alerts=$(jq -r '.alerts // 0' <<<"$line")
  run_id=$(jq -r '.run_id // ""' <<<"$line")
  reason=$(jq -r '.reason // ""' <<<"$line")
  built_count=$(jq -r '(.built // []) | length' <<<"$line")
  skipped_count=$(jq -r '(.skipped // []) | length' <<<"$line")

  local hhmm epoch=""
  if [[ "$is_darwin" == "1" ]]; then
    epoch=$(date -j -u -f "%Y-%m-%dT%H:%M:%SZ" "$ts" "+%s" 2>/dev/null) || epoch=""
    [[ -n "$epoch" ]] && hhmm=$(date -j -f "%s" "$epoch" "+%H:%M" 2>/dev/null) || hhmm=""
  else
    hhmm=$(date -d "$ts" "+%H:%M" 2>/dev/null) || hhmm=""
  fi
  [[ -z "$hhmm" ]] && hhmm=$(printf "%s" "$ts" | sed -E 's/.*T([0-9]{2}):([0-9]{2}).*/\1:\2/')
  local prefix=""
  if [[ "$show_project" == "true" ]]; then
    prefix="[$(basename "$project")] "
  fi

  case "$status" in
    built)
      local skipped_note=""
      [[ "$skipped_count" -gt 0 ]] && skipped_note=", ${skipped_count} skipped"
      local items_word; [[ "$built_count" -eq 1 ]] && items_word="item" || items_word="items"
      # US-VIEW-019: append slowest phase summary when phases data is present.
      local slowest_note=""
      local slowest_str; slowest_str=$(_loop_runs_slowest_phase "$line")
      [[ -n "$slowest_str" ]] && slowest_note=", slowest=${slowest_str}"
      printf "  %s  %s✅ built %d %s (%d tcr%s, %s%s)\n" \
        "$hhmm" "$prefix" "$built_count" "$items_word" "$tcr" "$skipped_note" "$(_loop_runs_dur "$duration")" "$slowest_note"
      local id desc
      while IFS= read -r id; do
        [[ -z "$id" ]] && continue
        desc=""
        if [[ -n "$_LOOP_RUNS_BACKLOG" ]]; then
          desc=$(printf "%s\n" "$_LOOP_RUNS_BACKLOG" | awk -F'|' -v id="$id" '
            NF >= 3 {
              cell = $2; gsub(/^[[:space:]]+|[[:space:]]+$/, "", cell)
              if (cell == id || cell ~ "^\\[" id "\\]") {
                gsub(/^[[:space:]]+|[[:space:]]+$/, "", $3); print $3; exit
              }
            }')
        fi
        if [[ -n "$desc" ]]; then
          [[ ${#desc} -gt 72 ]] && desc="${desc:0:69}..."
          printf "    • %-14s %s\n" "$id" "$desc"
        else
          printf "    • %s\n" "$id"
        fi
      done < <(jq -r '(.built // []) | .[]' <<<"$line")
      ;;
    idle)
      printf "  %s  %s○ idle — no Todo items\n" "$hhmm" "$prefix"
      ;;
    failed)
      local msg="${reason:-unknown}"
      printf "  %s  %s✗ FAILED — %s\n" "$hhmm" "$prefix" "$msg"
      ;;
    *)
      printf "  %s  %s? %s\n" "$hhmm" "$prefix" "$status"
      ;;
  esac
}

# `roll loop runs [N] [--all]` — show recent loop iteration summaries.
_loop_runs() {
  local n=10 all_flag=false detail_cycle=""
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --all) all_flag=true; shift ;;
      --detail) detail_cycle="${2:-}"; shift 2 ;;
      --detail=*) detail_cycle="${1#--detail=}"; shift ;;
      [0-9]*) n="$1"; shift ;;
      *) shift ;;
    esac
  done

  # US-VIEW-019: --detail <cycle_id> prints the Phase Breakdown panel.
  if [[ -n "$detail_cycle" ]]; then
    _loop_runs_detail "$detail_cycle"
    return $?
  fi

  # US-LOOP-020: read the current project's local runs.jsonl. --all aggregates
  # every project's file (enumerated via launchd plists), merged + time-sorted.
  local runs_src; runs_src=$(_loop_runs_file)

  if ! command -v jq >/dev/null 2>&1; then
    err "$(msg loop.jq_required_for_roll_loop_runs)"
    return 1
  fi

  local project_path; project_path=$(pwd -P)
  local project_slug; project_slug=$(_project_slug "$project_path")
  local filtered
  if $all_flag; then
    # US-LOOP-020: cross-project view — enumerate plists, traverse each
    # project's .roll/loop/runs.jsonl, merge and sort by ts (oldest→newest).
    # Backfill is intentionally skipped here: it would require resolving and
    # querying gh for every project's repo (slow + N network calls). The
    # single-project path below backfills, so per-project views stay fresh.
    filtered=$(_loop_runs_aggregate_all)
  else
    # FIX-060: refresh merged status before reading, so paused-window merges show up.
    _loop_backfill_merged "$runs_src" >/dev/null 2>&1 || true
    if [[ ! -f "$runs_src" ]] || [[ ! -s "$runs_src" ]]; then
      echo "$(msg loop.no_loop_runs_yet)"
      return 0
    fi
    filtered=$(jq -c --arg p "$project_slug" 'select(.project == $p)' "$runs_src")
  fi

  if [[ -z "$filtered" ]]; then
    echo "$(msg loop.no_loop_runs_for_current_project)"
    return 0
  fi

  local reversed; reversed=$(printf "%s\n" "$filtered" | awk '{a[NR]=$0} END{for(i=NR; i>=1; i--) print a[i]}')
  local recent; recent=$(printf "%s\n" "$reversed" | head -n "$n")

  local _is_darwin=""
  [[ "$(uname)" == "Darwin" ]] && _is_darwin="1"

  _LOOP_RUNS_BACKLOG=""
  [[ -f "$project_path/.roll/backlog.md" ]] && _LOOP_RUNS_BACKLOG=$(cat "$project_path/.roll/backlog.md")

  while IFS= read -r line; do
    [[ -z "$line" ]] && continue
    _loop_runs_format_line "$line" "$all_flag" "$_is_darwin"
  done <<<"$recent"
  unset _LOOP_RUNS_BACKLOG
}

# Send a macOS system notification. No-op when muted, non-macOS, or osascript unavailable.
_notify() {
  local title="${1:-roll}"
  local body="${2:-}"
  [ "$(uname)" = "Darwin" ] || return 0
  [ -f "$_LOOP_MUTE_FILE" ] && return 0
  command -v osascript >/dev/null 2>&1 || return 0
  osascript -e "display notification \"${body}\" with title \"${title}\"" >/dev/null 2>&1 || true
}

# Count `tcr:` prefixed commits in the current git repo since started_at timestamp.
# FIX-D 2026-05-25: use --all so commits made on a cycle worktree branch (not
# yet merged into the current HEAD) are still counted. Without --all the
# enforce-tcr check inside a cycle-worktree cwd would miss the worktree's own
# fresh commits when the runner happens to chdir to the main repo before
# calling enforce-tcr.
_loop_tcr_count() {
  local started_at="$1"
  git log --all --oneline --since="${started_at}" 2>/dev/null \
    | awk '/^[a-f0-9]+ tcr:/{n++} END{print n+0}'
}

# US-LOOP-016: display a single cycle log with header.
_loop_log_show() {
  local file="$1"
  local id; id=$(basename "$file" .log)
  local ts; ts=$(echo "$id" | sed 's/^\([0-9]\{4\}\)\([0-9]\{2\}\)\([0-9]\{2\}\)-\([0-9]\{2\}\)\([0-9]\{2\}\).*/\1-\2-\3 \4:\5/')
  printf '# cycle %s · %s\n' "$id" "$ts"
  cat "$file"
}

# US-LOOP-016: roll loop log [cycle-id] — view per-cycle logs.
_loop_log() {
  local project_path; project_path=$(pwd -P)
  local logs_dir="${project_path}/.roll/cycle-logs"
  local query="${1:-}"

  # Directory missing or empty — friendly message.
  if [[ ! -d "$logs_dir" ]] || [[ -z "$(ls -A "$logs_dir" 2>/dev/null)" ]]; then
    echo "$(msg loop.no_cycle_logs_found_run_roll)"
    return 0
  fi

  if [[ -z "$query" ]]; then
    # US-LOOP-020: cron.log is the legacy machine-wide aggregate; steer users to
    # per-cycle search. Notice on stderr so it never pollutes piped log output.
    echo "note: cron.log is a legacy aggregate; use 'roll loop log <cycle-id>' for per-cycle search" >&2
    echo "提示：cron.log 是旧的聚合日志；按周期查看请用 'roll loop log <cycle-id>'" >&2
    # No argument: find latest .log by cycle-id (filename reverse-sort).
    # Cycle filenames encode the start timestamp (YYYYMMDD-HHMMSS-PID.log) so
    # the lexicographically greatest name is always the most recent cycle.
    # mtime-based sorting (ls -t) is unreliable when files are created in the
    # same sub-second window (all share the same inode timestamp).
    local latest; latest=$(ls "$logs_dir"/*.log 2>/dev/null | sort -r | head -1)
    if [[ -z "$latest" ]]; then
      echo "$(msg loop.no_cycle_logs_found_run_roll_2)"
      return 0
    fi
    _loop_log_show "$latest"
    return 0
  fi

  # Exact match first.
  local exact="${logs_dir}/${query}.log"
  if [[ -f "$exact" ]]; then
    _loop_log_show "$exact"
    return 0
  fi

  # Prefix match: glob for query*.log files.
  local matches; matches=$(ls "$logs_dir/${query}"*.log 2>/dev/null || true)
  local count=0
  if [[ -n "$matches" ]]; then
    count=$(echo "$matches" | wc -l | tr -d ' ')
    count=$((count))
  fi

  if [[ "$count" -eq 0 ]]; then
    echo "$(msg loop.no_cycle_log_matching ${query})"
    return 1
  elif [[ "$count" -eq 1 ]]; then
    _loop_log_show "$matches"
    return 0
  else
    echo "$(msg loop.ambiguous_prefix_matches_logs ${query} ${count})"
    echo "$matches" | while IFS= read -r f; do
      echo "  $(basename "$f" .log)"
    done
    return 1
  fi
}

# Parse origin remote URL → "owner/repo" for GitHub repos.
# Returns non-zero if no origin, or origin is not github.com.
# Decoupled from `gh` auto-detection so SSH config host rewrites don't break it.
_gh_repo_slug() {
  local url
  url=$(git config --get remote.origin.url 2>/dev/null) || return 1
  case "$url" in
    git@github.com:*)          url="${url#git@github.com:}" ;;
    ssh://git@github.com/*)    url="${url#ssh://git@github.com/}" ;;
    https://github.com/*)      url="${url#https://github.com/}" ;;
    http://github.com/*)       url="${url#http://github.com/}" ;;
    *)                         return 1 ;;
  esac
  url="${url%.git}"
  [[ -z "$url" ]] && return 1
  printf "%s\n" "$url"
}

# Returns 0 if gh CLI is installed and executable, 1 otherwise.
_gh_available() { command -v gh >/dev/null 2>&1; }

# Resolve the GitHub owner/repo slug and set <outvar> to it.
# Returns 0 on success. Returns 1 (no output) if gh is unavailable or the
# remote is not a GitHub URL — caller decides how to handle failure.
_gh_resolve() {
  local _outvar="$1"
  _gh_available || return 1
  local _slug
  _slug=$(_gh_repo_slug 2>/dev/null) || return 1
  printf -v "$_outvar" '%s' "$_slug"
}

# Poll gh run list until current commit's CI completes.
# Returns 0 on success (or when gh binary missing — graceful skip).
# Returns 1 on CI failure, timeout, or any gh call failure.
_ci_wait() {
  local timeout="${1:-300}"
  local interval=15
  local elapsed=0

  _gh_available || { warn "$(msg loop.gh_not_installed_skipping_ci_gate)"; return 0; }

  local commit; commit=$(git rev-parse HEAD 2>/dev/null) || { err "$(msg loop.not_a_git_repo)"; return 1; }
  local short; short=$(git rev-parse --short HEAD 2>/dev/null)

  # Resolve owner/repo from git remote so we don't depend on gh's auto-detection,
  # which breaks when ~/.ssh/config rewrites `Host github.com` → IP address.
  local repo_slug; repo_slug=$(_gh_repo_slug) || {
    err "$(msg loop.cannot_determine_github_repo_from_origin)"
    return 1
  }

  ok "$(msg loop.waiting_for_ci_on ${short} ${repo_slug})"

  while (( elapsed < timeout )); do
    local runs
    runs=$(gh -R "$repo_slug" run list --commit "$commit" --json status,conclusion 2>&1) || {
      err "$(msg loop.gh_run_list_failed_for_gh ${repo_slug} ${short} ${runs})"
      return 1
    }

    if [[ -z "$runs" || "$runs" == "[]" ]]; then
      # FIX-046: CI only fires on pull_request events — without a PR, runs will never appear.
      # Check if an open PR exists; if not, skip the gate gracefully.
      local _branch; _branch=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
      if [[ -n "$_branch" ]]; then
        local _pr_json; _pr_json=$(gh -R "$repo_slug" pr list --head "$_branch" --state open --json number 2>/dev/null || echo "1")
        local _pr_count; _pr_count=$(echo "$_pr_json" | jq 'length' 2>/dev/null || echo "1")
        if [[ "$_pr_count" == "0" ]]; then
          warn "$(msg loop.no_open_pr_for_ci_not ${_branch})"
          return 0
        fi
      fi
      (( elapsed == 0 )) && echo "$(msg loop.no_ci_runs_found_yet_waiting)"
      sleep "$interval"
      elapsed=$(( elapsed + interval ))
      continue
    fi

    local pending
    pending=$(echo "$runs" | jq -r '[.[] | select(.status != "completed")] | length' 2>/dev/null || echo "0")

    if [[ "$pending" -gt 0 ]]; then
      printf "$(msg loop.ci_running_ds_ci)" "$elapsed" "$elapsed"
      sleep "$interval"
      elapsed=$(( elapsed + interval ))
      continue
    fi

    local failed
    failed=$(echo "$runs" | jq -r '[.[] | select(.conclusion != "success" and .conclusion != "skipped" and .conclusion != null)] | length' 2>/dev/null || echo "0")

    if [[ "$failed" -gt 0 ]]; then
      err "$(msg loop.ci_failed_for_ci ${short})"
      return 1
    fi

    ok "$(msg loop.ci_passed_ci)"
    return 0
  done

  warn "$(msg loop.ci_timed_out_after_s_ci ${timeout})"
  return 1
}

# Pre-run CI health check — call before picking up new stories.
# US-LOOP-056: sync .roll/ (roll-meta private submodule) before each cycle so
# the cycle always runs against the latest backlog. Fail-soft: any error emits
# a meta_sync event and returns 0 so the cycle continues with stale/existing meta.
#
# Statuses emitted via _loop_event meta_sync:
#   ok      – fetch + reset --hard succeeded
#   stale   – fetch failed; existing .roll/ used as fallback
#   skipped – no git remote configured (not a roll-meta managed project)
#
# US-SYNC-008: when `.roll/local.yaml backlog_sync.on_loop_cycle: true`, pull
# fresh GitHub issues into the backlog at cycle preflight so the upcoming story
# scan sees new issues. Default is OFF — absent file/key/flag is a silent no-op,
# so cycle behaviour is unchanged for projects that haven't opted in. Fail-soft:
# a sync error never blocks the cycle; it writes an ALERT and returns 0. The
# project's main .roll/ is synced (real sync, not dry-run) so the worktree's
# backlog copy (_worktree_sync_meta runs after this) carries the new rows.
_loop_backlog_sync_hook() {
  local project_path="$1"
  local local_yaml="${project_path}/.roll/local.yaml"
  local backlog="${project_path}/.roll/backlog.md"
  local cid="${CYCLE_ID:-unknown}"
  command -v python3 >/dev/null 2>&1 || return 0
  [ -f "$local_yaml" ] || return 0
  # Read the switch (true/false). Absent file or key prints "false".
  local on
  on=$(python3 "${ROLL_PKG_DIR}/lib/github_sync.py" on-loop-cycle \
         --local-yaml "$local_yaml" 2>/dev/null || echo "false")
  if [ "$on" != "true" ]; then
    return 0
  fi
  _loop_event backlog_sync "$cid" "start" "" 2>/dev/null || true
  # Real sync into the project's backlog. github_sync.py resolves --repo from
  # the persisted backlog_sync.repo, so no --repo flag is needed here.
  if python3 "${ROLL_PKG_DIR}/lib/github_sync.py" sync \
       --backlog "$backlog" --local-yaml "$local_yaml" >/dev/null 2>&1; then
    _loop_event backlog_sync "$cid" "ok" "" 2>/dev/null || true
  else
    _loop_event backlog_sync "$cid" "failed" "" 2>/dev/null || true
    _worktree_alert "cycle ${cid}: backlog sync failed (on_loop_cycle=true); cycle continues with existing backlog" 2>/dev/null || true
  fi
  return 0
}

# FIX-160: check whether every dirty tracked file in .roll/ is a loop-generated
# path. If so, meta_sync can proceed with a sparse checkout that leaves those
# paths untouched. Human edits outside loop paths still trigger the FIX-145
# skip-to-protect guard.
_loop_meta_sync_is_loop_only_dirty() {
  local roll_meta="$1"
  local line path
  while IFS= read -r line; do
    [ -n "$line" ] || continue
    path="${line#???}"
    # Renames: "old -> new" — take the destination
    case "$path" in
      *" -> "*) path="${path#* -> }" ;;
    esac
    case "$path" in
      signals/*|notes/*|cycle-logs/*|local.yaml) continue ;;
      *) return 1 ;;
    esac
  done < <(git -C "$roll_meta" status --porcelain --untracked-files=no 2>/dev/null)
  return 0
}

# Env override: ROLL_LOOP_META_SYNC_TIMEOUT (default 15) controls fetch timeout.
_loop_sync_meta() {
  local project_path="$1"
  local roll_meta="${project_path}/.roll"
  local timeout_sec="${ROLL_LOOP_META_SYNC_TIMEOUT:-15}"
  local cid="${CYCLE_ID:-unknown}"
  local slug="${_LOOP_PROJ_SLUG:-$(_project_slug 2>/dev/null || echo unknown)}"
  local shared_dir="${_SHARED_ROOT:-$HOME/.shared/roll}/loop"
  local fail_counter="${shared_dir}/meta-sync-fail-${slug}"

  # Detect remote via the canonical probe point. If .roll/ has no .git or no
  # remote configured, treat as "not managed" and skip silently.
  local remote_url
  remote_url=$(git -C "$roll_meta" remote get-url origin 2>/dev/null || echo "")
  if [ -z "$remote_url" ]; then
    return 0
  fi

  # FIX-145: never `reset --hard` over uncommitted local .roll edits. The local
  # .roll is not only a cache of the roll-meta remote — a human (or a design
  # session) often edits backlog / domain files directly. A hard reset would
  # silently destroy that work. If tracked files differ from HEAD, skip the sync
  # entirely and keep the local edits; emit a `dirty` event so it's visible.
  # (Untracked files are safe — `reset --hard` does not touch them, so we only
  # guard on tracked modifications. A repo with no commits yet has no HEAD and
  # nothing to protect, so the guard is skipped.)
  # NOTE: use `git status --porcelain` rather than `git diff --quiet HEAD` —
  # status refreshes the index stat cache and content-checks racy entries, so it
  # reliably detects a just-written edit even on fast machines (a same-second
  # working-tree change can slip past `git diff --quiet` on CI). --untracked-files=no
  # keeps untracked files (safe from reset) from blocking the sync forever.
  local _loop_only_dirty=0
  if git -C "$roll_meta" rev-parse --verify -q HEAD >/dev/null 2>&1 \
     && [ -n "$(git -C "$roll_meta" status --porcelain --untracked-files=no 2>/dev/null)" ]; then
    if _loop_meta_sync_is_loop_only_dirty "$roll_meta"; then
      _loop_only_dirty=1
    else
      _loop_event meta_sync "$cid" "dirty" "uncommitted .roll edits — sync skipped to protect local work" 2>/dev/null || true
      return 0
    fi
  fi

  # Attempt fetch with timeout
  local _fetch_ok=0
  if command -v timeout >/dev/null 2>&1; then
    timeout "$timeout_sec" git -C "$roll_meta" fetch --quiet 2>/dev/null && _fetch_ok=1
  else
    git -C "$roll_meta" fetch --quiet 2>/dev/null && _fetch_ok=1
  fi

  if [ "$_fetch_ok" -eq 1 ]; then
    if [ "$_loop_only_dirty" -eq 1 ]; then
      # FIX-160: sparse sync — update source-of-truth paths from origin/main
      # while preserving loop-generated产物 (signals, notes, cycle-logs, local.yaml)
      # that would otherwise make .roll/ permanently dirty and block sync.
      git -C "$roll_meta" checkout origin/main -- . ':!signals/' ':!notes/' ':!cycle-logs/' ':!local.yaml' 2>/dev/null || true
      _loop_event meta_sync "$cid" "ok" "sparse-sync" 2>/dev/null || true
    else
      if git -C "$roll_meta" reset --hard origin/main --quiet 2>/dev/null; then
        _loop_event meta_sync "$cid" "ok" "" 2>/dev/null || true
      fi
    fi
    # US-LOOP-057: reset consecutive failure counter on success
    rm -f "$fail_counter" 2>/dev/null || true
    return 0
  fi

  # Fetch or reset failed — stale .roll/ used; cycle continues
  _loop_event meta_sync "$cid" "stale" "fetch/reset failed" 2>/dev/null || true

  # US-LOOP-057: increment failure counter; write ALERT after 3 consecutive failures
  mkdir -p "$shared_dir" 2>/dev/null || true
  local count=0
  [ -f "$fail_counter" ] && count=$(cat "$fail_counter" 2>/dev/null || echo 0)
  count=$(( count + 1 ))
  printf '%s\n' "$count" > "$fail_counter"
  if [ "$count" -ge 3 ]; then
    printf '[%s] roll-meta sync consecutive failures: %d times. Check SSH key / network.\n  Last error: fetch/reset failed for %s\n' \
      "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$count" "$remote_url" >> "${shared_dir}/ALERT-${slug}.md" 2>/dev/null || true
  fi
  return 0
}

# Refuses to build on a red base (HEAD CI failed). Lenient on unknown states
# (gh missing, repo unparseable, no runs yet) — the post-build _loop_enforce_ci
# is the strict gate.
# Returns 0: ok to proceed (green / pending / unknown / no gh).
# Returns 1: HEAD CI is definitively red → ALERT written, do not build.
_loop_precheck_ci() {
  local slug; _gh_resolve slug || return 0

  local commit; commit=$(git rev-parse HEAD 2>/dev/null) || return 0

  # FIX-103: fetch both `status` and `conclusion`. Pre-run gate must distinguish
  # a still-running CI (status=in_progress/queued/waiting, conclusion=null) from
  # a genuinely red CI (conclusion=failure/cancelled/timed_out/...). Treating
  # in_progress as red kills every cycle started within the first ~30s of a
  # merge-triggered CI run.
  local runs
  runs=$(gh -R "$slug" run list --commit "$commit" --json conclusion,status 2>/dev/null) || return 0
  [[ -z "$runs" || "$runs" == "[]" ]] && return 0

  # Conclusions that block the loop. Anything else (success, skipped, neutral,
  # or null while still running) is treated as pass/pending.
  local failed_conclusions
  failed_conclusions=$(echo "$runs" \
    | jq -r '[.[] | select(.conclusion=="failure" or .conclusion=="cancelled" or .conclusion=="timed_out" or .conclusion=="action_required" or .conclusion=="startup_failure") | .conclusion] | unique | join(",")' \
    2>/dev/null || echo "")

  if [[ -n "$failed_conclusions" ]]; then
    local short; short=$(git rev-parse --short HEAD 2>/dev/null || echo unknown)
    local run_states
    run_states=$(echo "$runs" \
      | jq -r '[.[] | "\(.status // "?")/\(.conclusion // "null")"] | unique | join(", ")' \
      2>/dev/null || echo "?")

    # US-LOOP-046/048: check whether hot-fix path is allowed before aborting.
    # ROLL_LOOP_NO_HEAL=1 or ROLL_LOOP_HEAL_MAX=0 → fall through to original abort.
    local _heal_max="${ROLL_LOOP_HEAL_MAX:-2}"
    if [[ "${ROLL_LOOP_NO_HEAL:-}" != "1" ]] && [[ "$_heal_max" -gt 0 ]]; then
      local _state_file="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/state-${_LOOP_PROJ_SLUG:-$(basename "$PWD")}.yaml"
      local _heal_key="heal_count_head_${commit:0:8}"
      local _count=0
      [[ -f "$_state_file" ]] && _count=$(grep "^${_heal_key}:" "$_state_file" 2>/dev/null | awk '{print $2}' || echo 0)
      _count=$(( ${_count:-0} + 0 ))  # coerce to int
      if [[ "$_count" -lt "$_heal_max" ]]; then
        # Increment counter and signal hot-fix path to the agent
        _count=$(( _count + 1 ))
        mkdir -p "$(dirname "$_state_file")" 2>/dev/null || true
        if [[ -f "$_state_file" ]]; then
          # Update existing key or append
          if grep -q "^${_heal_key}:" "$_state_file" 2>/dev/null; then
            local _tmp; _tmp=$(mktemp)
            grep -v "^${_heal_key}:" "$_state_file" > "$_tmp" 2>/dev/null || true
            printf '%s: %d\n' "$_heal_key" "$_count" >> "$_tmp"
            mv "$_tmp" "$_state_file"
          else
            printf '%s: %d\n' "$_heal_key" "$_count" >> "$_state_file"
          fi
        else
          printf '%s: %d\n' "$_heal_key" "$_count" > "$_state_file"
        fi
        # Exit 2 signals the agent: CI is red, hot-fix path is available
        return 2
      fi
    fi

    err "$(msg loop.pre_run_ci_check_head_ci ${short})"
    mkdir -p "$(dirname "$_LOOP_ALERT")"
    cat > "$_LOOP_ALERT" << EOF
# ALERT — Pre-run CI check failed (red base)

**Time**: $(date '+%Y-%m-%d %H:%M')
**Commit**: ${short}
**Reason**: $(msg loop.pre_run_ci_red_base)
**Failing conclusions**: ${failed_conclusions}
**Run states**: ${run_states}

**Action required**:
- Investigate and fix CI: \`gh -R ${slug} run list --commit ${commit}\`
- After fixing and pushing green commit: \`roll loop now\`
EOF
    _loop_diagnose_open_prs "$slug" >> "$_LOOP_ALERT"
    _notify "roll ⚠ CI red" "loop refused to build on broken base (${short})"
    return 1
  fi
  return 0
}

# US-LOOP-047: hot-fix context factory for HEAD CI failures.
# Captures failing run logs + recent commit diff, writes to /tmp/roll-heal-head-<sha>.log
# Returns 0 and prints the log path on success; 1 if context could not be gathered.
_loop_hotfix_head_context() {
  local commit="${1:-$(git rev-parse HEAD 2>/dev/null)}"
  [[ -z "$commit" ]] && return 1
  local short="${commit:0:8}"
  local outfile="/tmp/roll-heal-head-${short}.log"
  local slug; _gh_resolve slug || slug="unknown"

  {
    printf '=== CI Hot-fix Context: HEAD %s ===\n\n' "$short"
    printf '--- Recent commits ---\n'
    git log --oneline -5 2>/dev/null || true
    printf '\n--- Diff of last commit ---\n'
    git show --stat HEAD 2>/dev/null | head -40 || true
    printf '\n--- CI failure logs (head 200 lines) ---\n'
    local run_id
    run_id=$(gh -R "$slug" run list --commit "$commit" \
      --json databaseId,conclusion -L 5 2>/dev/null \
      | jq -r '.[] | select(.conclusion=="failure") | .databaseId' 2>/dev/null | head -1)
    if [[ -n "$run_id" ]]; then
      gh -R "$slug" run view --log-failed "$run_id" 2>/dev/null | head -200 || true
    else
      printf '(no failed run found for commit %s)\n' "$short"
    fi
  } > "$outfile" 2>/dev/null
  printf '%s\n' "$outfile"
  return 0
}

# US-LOOP-050: PR hot-fix entry point.
# Checks out the PR branch, captures CI failure logs, and prepares context
# for a roll-fix invocation on the PR branch.
# Usage: _loop_hot_fix_pr <pr_number>
_loop_hot_fix_pr() {
  local pr_num="$1"
  [[ -z "$pr_num" ]] && return 1
  local slug; _gh_resolve slug || return 1
  local outfile="/tmp/roll-heal-pr-${pr_num}.log"
  local run_id
  run_id=$(gh -R "$slug" run list --json databaseId,conclusion,headBranch -L 20 2>/dev/null \
    | jq -r --argjson pr "\"$pr_num\"" \
      '.[] | select(.conclusion=="failure") | .databaseId' 2>/dev/null | head -1)
  {
    printf '=== PR #%s CI Hot-fix Context ===\n\n' "$pr_num"
    if [[ -n "$run_id" ]]; then
      gh -R "$slug" run view --log-failed "$run_id" 2>/dev/null | head -200 || true
    else
      printf '(no failed run found for PR #%s)\n' "$pr_num"
    fi
  } > "$outfile" 2>/dev/null
  printf '%s\n' "$outfile"
  return 0
}

# _loop_diagnose_open_prs <slug>
#   Appended to ALERT when CI is red on HEAD.
#   For each open PR targeting main: lists CI failing tests + changed files,
#   flags whether failures look unrelated to the PR's own changes.
_loop_diagnose_open_prs() {
  local slug="$1"
  local prs
  prs=$(gh -R "$slug" pr list --base main --state open --json number,title,headRefName \
    --jq '.[] | [.number|tostring, .headRefName, .title] | @tsv' 2>/dev/null) || return 0
  [[ -z "$prs" ]] && return 0

  printf '\n## Open PRs (potential fixes)\n'
  while IFS=$'\t' read -r pr_num branch pr_title; do
    printf '\nPR #%s: %s\n' "$pr_num" "$pr_title"

    # Files changed in this PR
    local changed_files
    changed_files=$(gh -R "$slug" pr diff "$pr_num" --name-only 2>/dev/null | head -10) || changed_files="(unable to fetch)"
    printf '  Changed: %s\n' "$(echo "$changed_files" | tr '\n' ' ')"

    # Latest CI run on the PR branch
    local run_id conclusion
    run_id=$(gh -R "$slug" run list --branch "$branch" --json databaseId,conclusion \
      --jq '[.[] | select(.conclusion != null)] | first | .databaseId' 2>/dev/null) || run_id=""
    conclusion=$(gh -R "$slug" run list --branch "$branch" --json databaseId,conclusion \
      --jq '[.[] | select(.conclusion != null)] | first | .conclusion' 2>/dev/null) || conclusion="unknown"

    if [[ "$conclusion" == "success" ]]; then
      printf '  CI: green — blocked only by branch protection (safe to merge)\n'
      printf '  Suggest: gh pr merge %s --admin\n' "$pr_num"
    elif [[ -n "$run_id" ]]; then
      local failing_tests
      failing_tests=$(gh -R "$slug" run view "$run_id" --log-failed 2>/dev/null \
        | grep -oP '(?<=not ok \d{1,4} ).*' | head -8) || failing_tests="(unable to fetch)"

      printf '  CI: %s\n' "$conclusion"
      printf '  Failing tests:\n'
      while IFS= read -r t; do
        [[ -n "$t" ]] && printf '    - %s\n' "$t"
      done <<< "$failing_tests"

      # Heuristic: if no failing test mentions a changed file, flag as likely unrelated
      local related=0
      while IFS= read -r f; do
        local base; base=$(basename "$f")
        echo "$failing_tests" | grep -qi "$base" && { related=1; break; }
      done <<< "$changed_files"
      if [[ "$related" -eq 0 ]]; then
        printf '  Note: failing tests appear UNRELATED to changed files — consider manual merge\n'
        printf '  Suggest: gh pr merge %s --admin  (verify tests manually first)\n' "$pr_num"
      else
        printf '  Note: failing tests may relate to PR changes — investigate before merging\n'
      fi
    fi
  done <<< "$prs"
}

# CI gate before marking a story Done.
# On CI failure: writes ALERT, returns 1 (caller keeps story 🔨 In Progress).
# When gh unavailable: returns 0 (graceful skip).
_loop_enforce_ci() {
  local story_id="$1"
  local _ci_result
  if _ci_wait 300; then
    _loop_event ci "$story_id" "" "ok" 2>/dev/null || true
    return 0
  fi
  _loop_event ci "$story_id" "" "red" 2>/dev/null || true

  mkdir -p "$(dirname "$_LOOP_ALERT")"
  cat > "$_LOOP_ALERT" << EOF
# ALERT — CI gate failed

**Time**: $(date '+%Y-%m-%d %H:%M')
**Story**: ${story_id}
**Commit**: $(git rev-parse --short HEAD 2>/dev/null || echo unknown)
**Reason**: $(msg loop.ci_did_not_pass_story_kept_in_progress)

**Action required** (choose one):
- Fix CI and re-run: \`roll loop now\`
- Take over manually: \`\$roll-build ${story_id}\`
- Reset and retry: \`roll loop reset\` then \`roll loop now\`
EOF
  _notify "roll ⚠ CI Failed" "${story_id}: CI did not pass"
  return 1
}

_loop_heal_dir() {
  printf '%s\n' "${ROLL_LOOP_DIR:-${HOME}/.shared/roll/loop}/heal"
}

# US-LOOP-062a: deduped [TYPE:loop-pr-ci-red] ALERT for a red loop/* PR. One
# line per PR until the alert file is consumed — never silently drops.
_loop_pr_ci_red_alert() {
  local num="$1" head_ref="$2" msg="${3:-own loop PR CI red — needs heal}"
  local alert="${_LOOP_ALERT}"
  [ -n "$alert" ] || return 0
  mkdir -p "$(dirname "$alert")" 2>/dev/null || true
  grep -qF "[TYPE:loop-pr-ci-red] PR #${num} " "$alert" 2>/dev/null && return 0
  printf '[%s] [error] [TYPE:loop-pr-ci-red] PR #%s %s: %s\n' \
    "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$num" "$head_ref" "$msg" >> "$alert"
}

# US-LOOP-062a: upsert "key: value" in the per-slug loop state file (same flat
# format the HEAD-CI heal counter uses).
_loop_state_set() {
  local key="$1" val="$2" state="${_LOOP_STATE}"
  [ -n "$state" ] || return 0
  mkdir -p "$(dirname "$state")" 2>/dev/null || true
  if [ -f "$state" ] && grep -q "^${key}:" "$state" 2>/dev/null; then
    local _tmp; _tmp=$(mktemp)
    grep -v "^${key}:" "$state" > "$_tmp" 2>/dev/null || true
    printf '%s: %s\n' "$key" "$val" >> "$_tmp"
    mv "$_tmp" "$state"
  else
    printf '%s: %s\n' "$key" "$val" >> "$state"
  fi
}

# US-LOOP-062a: background-heal a red loop/* PR (loop_self_ci_red verdict).
# Bounded by heal_count.pr:<num> (max ROLL_LOOP_HEAL_MAX, default 2);
# ROLL_LOOP_NO_HEAL=1 disables. A per-PR lock (pid marker) prevents duplicate
# concurrent heals; a stale lock from a dead pid is reclaimed. On disabled /
# budget-exhausted / nothing-to-do → a deduped ALERT (never silent). The heal
# agent is chosen via _project_agent() (no bare `claude -p`). Non-blocking: the
# actual checkout→fix→push runs in a background subshell so the PR tick returns
# immediately.
_loop_pr_heal_self() {
  local num="$1" head_ref="$2" slug="${3:-}"
  [ -n "$num" ] || return 0

  local heal_max="${ROLL_LOOP_HEAL_MAX:-2}"
  if [ "${ROLL_LOOP_NO_HEAL:-}" = "1" ] || [ "${heal_max:-0}" -le 0 ]; then
    _loop_pr_ci_red_alert "$num" "$head_ref" "auto-heal off (ROLL_LOOP_NO_HEAL) — fix manually"
    return 0
  fi

  local heal_dir; heal_dir="$(_loop_heal_dir)"
  mkdir -p "$heal_dir" 2>/dev/null || true
  local lock="${heal_dir}/pr-${num}.lock"
  if [ -f "$lock" ]; then
    local lpid; lpid=$(cat "$lock" 2>/dev/null || echo "")
    if [ -n "$lpid" ] && kill -0 "$lpid" 2>/dev/null; then
      return 0                       # heal already in flight for this PR
    fi
    rm -f "$lock"                     # stale lock (dead pid) — reclaim
  fi

  local key="heal_count.pr:${num}"
  local count=0
  [ -f "${_LOOP_STATE}" ] && count=$(grep "^${key}:" "${_LOOP_STATE}" 2>/dev/null | awk '{print $2}' | head -1)
  count=$(( ${count:-0} + 0 ))
  if [ "$count" -ge "$heal_max" ]; then
    _loop_pr_ci_red_alert "$num" "$head_ref" "auto-heal budget exhausted (${count}/${heal_max}) — fix manually"
    return 0
  fi
  _loop_state_set "$key" "$(( count + 1 ))"

  local agent; agent="$(_project_agent 2>/dev/null)"; agent="${agent:-claude}"

  ( echo "${BASHPID:-$$}" > "$lock"
    _loop_pr_do_heal "$num" "$head_ref" "$slug" "$agent" >/dev/null 2>&1
    rm -f "$lock"
  ) &
  disown 2>/dev/null || true
  info "PR #${num}: background heal $(( count + 1 ))/${heal_max} dispatched (agent=${agent})"
  return 0
}

# US-LOOP-062a: the actual heal work (runs in the background subshell). Gathers
# the failing-CI context, checks out the PR branch in a throwaway worktree, hands
# the fix to the dynamically-selected agent via _agent_argv (no bare claude -p),
# and pushes back to the SAME PR branch. Best-effort: any failure leaves the PR
# untouched for the next tick (the heal budget caps retries). Overridable in
# tests.
_loop_pr_do_heal() {
  local num="$1" head_ref="$2" slug="${3:-}" agent="${4:-claude}"
  [ -n "$num" ] && [ -n "$head_ref" ] || return 1
  command -v gh >/dev/null 2>&1 || return 1
  [ -n "$slug" ] || _gh_resolve slug || return 1

  # Capture failing-run context for the fix prompt.
  local ctx="/tmp/roll-heal-pr-${num}.log"
  {
    printf '=== CI heal context: PR #%s (%s) ===\n\n' "$num" "$head_ref"
    gh -R "$slug" pr checks "$num" 2>/dev/null || true
    local _run
    _run=$(gh -R "$slug" pr checks "$num" --json link --jq '.[]|select(.state=="FAILURE")|.link' 2>/dev/null \
             | grep -oE 'runs/[0-9]+' | head -1 | cut -d/ -f2)
    if [ -n "$_run" ]; then
      printf '\n--- failing run log (tail) ---\n'
      gh -R "$slug" run view "$_run" --log-failed 2>/dev/null | tail -200 || true
    fi
  } > "$ctx" 2>&1

  # Isolated worktree on the PR branch.
  local wt; wt="$(mktemp -d)/pr-${num}"
  git fetch origin "$head_ref" >/dev/null 2>&1 || return 1
  git worktree add "$wt" "origin/${head_ref}" >/dev/null 2>&1 || { rm -rf "$(dirname "$wt")"; return 1; }

  local prompt="[roll PR 自愈] PR #${num} (${head_ref}) 的 CI 红了。失败上下文见 ${ctx}。请只修使 CI 转绿所需的最小改动,保持 TCR 微提交节奏,改完直接 commit。不要改无关代码,不要反问。"
  _agent_argv "$agent" text "$prompt"
  ( cd "$wt" && "${_AGENT_ARGV[@]}" ) >/dev/null 2>&1 || true

  # Push back to the same PR branch if the agent produced commits.
  if [ -n "$(cd "$wt" && git rev-list "origin/${head_ref}..HEAD" 2>/dev/null)" ]; then
    ( cd "$wt" && git push origin "HEAD:${head_ref}" ) >/dev/null 2>&1 || true
  fi
  git worktree remove --force "$wt" >/dev/null 2>&1 || true
  rm -rf "$(dirname "$wt")" 2>/dev/null || true
}

# US-LOOP-062b: merge a human-approved PR directly when CI is green and the PR
# is conflict-free, instead of waiting for repo-level auto-merge (which may be
# disabled). Mirrors the bot-approved eager-merge. Merge failure is NON-fatal:
# the PR is left open and the next PR-loop tick retries.
_loop_pr_merge_approved() {
  local num="$1" ci_state="$2" mergeable="$3" slug="$4"
  [ -n "$num" ] && [ -n "$slug" ] || return 0
  [ "$ci_state" = "success" ] || return 0
  # MERGEABLE (GraphQL mergeable) or CLEAN (mergeStateStatus) — see
  # _loop_pr_merge_self_eager for why both spellings must be accepted.
  case "$mergeable" in MERGEABLE|CLEAN) ;; *) return 0 ;; esac
  if gh -R "$slug" pr merge "$num" --squash --delete-branch >/dev/null 2>&1; then
    info "PR #${num}: human-approved + CI green — merged"
  else
    warn "PR #${num}: merge failed (human-approved + CI green) — left open, will retry"
  fi
}

# REFACTOR-030: removed `_loop_self_heal_ci` and `_loop_clear_heal_state`.
# REFACTOR-023 merged the CI self-heal counter into the main state.yaml flow,
# but the two helpers themselves were left behind as dead code. Their job
# now lives in the state.yaml read/write paths called from the loop runner.

# Verify TCR rhythm after a story completes. Returns 0 if ok, 1 if no TCR commits.
# On failure: reverts story in .roll/backlog.md to 📋 Todo and writes ALERT.
_loop_enforce_tcr() {
  local story_id="$1"
  local started_at="${2:-}"

  [[ -z "$started_at" ]] && return 0

  local count; count=$(_loop_tcr_count "$started_at")

  if [[ "$count" -eq 0 ]]; then
    # Revert story status
    if [[ -f ".roll/backlog.md" ]]; then
      local tmp; tmp=$(mktemp)
      sed "/\[${story_id}\]/s/ | ✅ Done |/ | 📋 Todo |/" .roll/backlog.md > "$tmp" \
        && mv "$tmp" .roll/backlog.md
    fi

    # Write ALERT
    mkdir -p "$(dirname "$_LOOP_ALERT")"
    cat > "$_LOOP_ALERT" << EOF
# ALERT — TCR check failed

**Time**: $(date '+%Y-%m-%d %H:%M')
**Story**: ${story_id}
**Reason**: zero tcr: commits since story start (${started_at})

**Action required** (choose one):
- Add TCR commits and re-run: \`roll loop now\`
- Take over manually: \`\$roll-build ${story_id}\`
- Reset and retry: \`roll loop reset\` then \`roll loop now\`
EOF
    _notify "roll ⚠ TCR Failed" "${story_id}: no tcr: commits found"
    return 1
  fi

  return 0
}

# FIX-032: dependency gate — parses BACKLOG inline tags so the loop SKILL
# can enforce them at Step 2 (story pickup). Pure functions, no side effects.
#
# BACKLOG row format (relevant fragments):
#   | [US-AUTO-033](...) | desc `depends-on:US-AUTO-037` | 📋 Todo |
#   | FIX-100 | desc `depends-on:US-A,US-B` | 📋 Todo |
#
# Row matching is anchored on `^| [?<id>\b` so a story-id appearing in some
# other row's depends-on list is not mistaken for the row that defines it.

# _loop_check_depends_on <story-id> [backlog-path]
#   Exit 0: all listed depends-on are ✅ Done, or no depends-on tag present.
#   Exit 1: any dep not ✅ Done, story-id not found, or backlog missing.
#   Stdout (on exit 1 due to unsatisfied deps): space-separated unsatisfied IDs.
_loop_check_depends_on() {
  local id="$1"
  local backlog="${2:-.roll/backlog.md}"
  [ -n "$id" ] || return 1
  [ -f "$backlog" ] || return 1

  local row
  row=$(grep -E "^\| \[?${id}[]| ]" "$backlog" | head -1)
  [ -n "$row" ] || return 1

  local deps
  # FIX-167: include lowercase letters so lettered sub-story ids (the a/b/c
  # split pattern, e.g. US-LOOP-062c) aren't truncated to their numeric base
  # (US-LOOP-062). The old class [A-Z0-9,-] stopped at the lowercase suffix,
  # dropping the suffix AND every dep after it — so a story depending on a Done
  # sub-story was matched against its (often Hold) parent and wrongly blocked.
  deps=$(echo "$row" | grep -oE 'depends-on:[A-Za-z][A-Za-z0-9,-]+' | head -1 | sed 's/depends-on://' || true)
  [ -n "$deps" ] || return 0

  local unsatisfied=""
  local dep
  local IFS_save="$IFS"
  IFS=','
  for dep in $deps; do
    local dep_row
    dep_row=$(grep -E "^\| \[?${dep}[]| ]" "$backlog" | head -1)
    if [ -z "$dep_row" ] || ! echo "$dep_row" | grep -qF '✅ Done'; then
      unsatisfied="${unsatisfied:+$unsatisfied }${dep}"
    fi
  done
  IFS="$IFS_save"

  if [ -n "$unsatisfied" ]; then
    echo "$unsatisfied"
    return 1
  fi
  return 0
}

# FIX-172: detect whether a story routes its output to roll-meta — path-based,
# never tag-based. A story is a roll-meta target iff its declared deliverable
# **Files:** include a path under .roll/ that is NOT a status-management file
# (.roll/backlog.md, .roll/features/). Every cycle flips its own backlog/feature
# status under .roll/, so "touches .roll/" must NOT flag a product story; only a
# genuine roll-meta deliverable (e.g. .roll/ops/watch.sh) counts.
# Returns 0 when the story delivers to roll-meta.
_loop_is_roll_meta_story() {
  local id="$1"
  local backlog="${2:-.roll/backlog.md}"
  [ -n "$id" ] || return 1
  [ -f "$backlog" ] || return 1

  local row
  row=$(grep -E "^\| \[?${id}[]| ]" "$backlog" | head -1)
  [ -n "$row" ] || return 1

  # Resolve the feature file from the row's markdown link: [id](path.md#anchor).
  # Links appear in two relative forms — ".roll/features/x.md" (rel. to product
  # root) or "features/x.md" (rel. to .roll). Normalise from "features/" onward
  # and resolve against the backlog's own directory (the roll-meta root).
  local link featrel metadir feat
  link=$(printf '%s\n' "$row" | grep -oE '\([^)]+\.md' | head -1 | sed -E 's/^\(//')
  [ -n "$link" ] || return 1
  featrel=$(printf '%s\n' "$link" | sed -E 's#^.*(features/.*)#\1#')
  metadir=$(dirname "$backlog")
  feat="${metadir}/${featrel}"
  [ -f "$feat" ] || return 1

  # Extract this story's **Files:** block, then test for a roll-meta deliverable
  # path (under .roll/ but not the universal status-management files).
  awk -v id="$id" '
    $0 ~ ("^## " id "( |$)") {insec=1; next}
    insec && /^## / {exit}
    insec && /^\*\*Files:\*\*/ {infiles=1; next}
    insec && infiles && /^\*\*/ {exit}
    insec && infiles {print}
  ' "$feat" \
    | grep -oE '\.roll/[A-Za-z0-9_./-]+' \
    | grep -vE '^\.roll/backlog\.md$|^\.roll/features/' \
    | grep -q .
}

# US-AUTO-034: PR-first inbox — loop processes open PRs before scanning BACKLOG.
#
# Three building blocks, kept as pure / mockable functions:
#   _loop_pr_classify        pure routing decision (no side effects)
#   _loop_pr_rebase_circuit  24h sliding-window circuit breaker on rebase retries
#   _loop_pr_inbox           orchestrator that walks `gh pr list` and routes
#                            each open PR to skip / review / rebase
#
# Design notes:
#   - gh missing or any gh failure → return 0 (lenient, like FIX-026's pre-check)
#   - self-authored loop/* PRs are skipped to avoid same-source AI review
#   - latest human review of CHANGES_REQUESTED or APPROVED blocks AI review
#     (Human-review-activity guard from US-AUTO-034 AC)
#   - rebase attempts ≥3 within 24h trip the circuit breaker (writes ALERT)

# _loop_pr_classify <head_ref> <human_review_state> <ci_state> <mergeable_state>
#   Prints one of:
#     ci_red    — CI failed → heal
#     stale     — needs rebase / conflicting / behind
#     ready     — CI green + clean → merge
#   Human review intentionally irrelevant — CI is the only gate.
_loop_pr_classify() {
  local head_ref="${1:-}"
  local human_review="${2:-}"
  local ci_state="${3:-}"
  local mergeable="${4:-}"

  case "$mergeable" in
    BEHIND|DIRTY|CONFLICTING) echo "stale"; return 0 ;;
  esac

  if [ "$ci_state" = "failure" ]; then
    echo "ci_red"; return 0
  fi

  echo "ready"
}

# _loop_pr_rebase_circuit <pr_number>
#   Side effect: appends current timestamp to $_LOOP_STATE under
#   pr_state.<PR>.attempts_at, pruning entries older than 24h.
#   Exit 0: attempt allowed and recorded.
#   Exit 1: ≥3 attempts within 24h → blocked; ALERT written.
_loop_pr_rebase_circuit() {
  local pr="$1"
  [ -n "$pr" ] || return 1

  local state="$_LOOP_STATE"
  local now; now=$(date -u +%s)
  local cutoff=$((now - 86400))

  # Extract existing timestamps for this PR (empty if absent).
  local existing=""
  if [ -f "$state" ]; then
    existing=$(awk -v pr="\"$pr\":" '
      $0 ~ "pr_state:" {in_pr=1; next}
      in_pr && $0 ~ pr {in_target=1; next}
      in_target && $0 ~ /attempts_at:/ {
        sub(/^[^"]*"/, ""); sub(/".*$/, ""); print; exit
      }
      in_target && /^[^[:space:]]/ {in_target=0}
    ' "$state" 2>/dev/null)
  fi

  # Prune stale timestamps (>24h ago).
  local fresh=""
  local ts
  for ts in $existing; do
    case "$ts" in
      ''|*[!0-9]*) continue ;;
    esac
    if [ "$ts" -ge "$cutoff" ]; then
      fresh="${fresh:+$fresh }$ts"
    fi
  done

  # Count attempts within window; ≥3 means this would be the 4th retry blocked.
  local count=0
  for ts in $fresh; do count=$((count + 1)); done

  if [ "$count" -ge 3 ]; then
    mkdir -p "$(dirname "${_LOOP_ALERT:-/dev/null}")" 2>/dev/null || true
    cat > "${_LOOP_ALERT}" <<EOF
# ALERT — PR rebase circuit breaker tripped

**Time**: $(date '+%Y-%m-%d %H:%M')
**PR**: #${pr}
**Reason**: $(msg loop.pr_rebased_within_24h_no_ci "$pr" "$count")

**Action required**:
- Check PR CI logs and workflow files for breakage
- Resolve manually, then: \`roll loop now\`
EOF
    return 1
  fi

  # Record this attempt and persist.
  fresh="${fresh:+$fresh }$now"
  _loop_pr_state_write "$pr" "$fresh" "$state"
  return 0
}

# Internal: rewrite $state with pr_state.<pr>.attempts_at = "<fresh-ts-list>".
# Minimal YAML writer — we own the schema and only need this one field family.
_loop_pr_state_write() {
  local pr="$1"
  local attempts="$2"
  local state="$3"

  mkdir -p "$(dirname "$state")" 2>/dev/null || true
  [ -f "$state" ] || : > "$state"

  local tmp; tmp=$(mktemp)
  awk -v pr="\"$pr\":" -v attempts="$attempts" '
    BEGIN { in_pr=0; in_target=0; written=0 }
    /^pr_state:/ { in_pr=1; print; next }
    in_pr && $0 ~ pr {
      in_target=1
      print "  " pr
      print "    attempts_at: \"" attempts "\""
      written=1
      next
    }
    in_target && /attempts_at:/ { next }   # skip old value, already written
    in_target && /^[^[:space:]]/ { in_target=0 }
    { print }
    END {
      if (!in_pr) {
        print "pr_state:"
        print "  " pr
        print "    attempts_at: \"" attempts "\""
      } else if (!written) {
        print "  " pr
        print "    attempts_at: \"" attempts "\""
      }
    }
  ' "$state" > "$tmp" && mv "$tmp" "$state"
}

# _loop_pr_review_external <pr_number>
#   Calls cmd_review_pr (US-PR-001) to run AI review on an eligible external PR.
#   Lenient: errors are logged but do not fail the loop.
_loop_pr_review_external() {
  local pr="$1"
  [ -n "$pr" ] || return 0
  cmd_review_pr "$pr" 2>&1 || {
    warn "review-pr failed for PR #${pr} (non-fatal)"
    return 0
  }
}

# _loop_pr_rebase_stale <pr_number> <head_ref>
#   Attempts to rebase a stale PR onto origin/main and push.
#   Fork PRs are skipped (no write access). Conflicts write ALERT.
_loop_pr_rebase_stale() {
  local pr="$1" head_ref="$2"
  [ -n "$pr" ] && [ -n "$head_ref" ] || return 0

  local slug; _gh_resolve slug || return 0

  local pr_json
  pr_json=$(gh -R "$slug" pr view "$pr" --json headRepository,headRepositoryOwner,isCrossRepository 2>/dev/null) || return 0
  local is_fork
  is_fork=$(echo "$pr_json" | jq -r '.isCrossRepository // false' 2>/dev/null)
  if [ "$is_fork" = "true" ]; then
    local alert="$_LOOP_ALERT"
    mkdir -p "$(dirname "$alert")" 2>/dev/null || true
    printf '[%s] PR #%s: fork PR — cannot rebase (no write access)\n' \
      "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$pr" >> "$alert"
    return 0
  fi

  git fetch origin "$head_ref" 2>/dev/null || return 0
  # Reset local tracking branch to the freshly-fetched remote state
  # before rebasing, otherwise force-push destroys commits pushed by others.
  git checkout -B "$head_ref" "origin/$head_ref" 2>/dev/null || return 0

  # FIX-159: save original branch so we can restore it unconditionally
  local _orig
  _orig=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) || _orig=""

  local _rebase_ok=0 _push_ok=0
  if git checkout "$head_ref" 2>/dev/null \
     && git rebase origin/main 2>/dev/null; then
    _rebase_ok=1
    if git push --force-with-lease origin "$head_ref" 2>/dev/null; then
      _push_ok=1
      info "PR #${pr}: rebased ${head_ref} onto origin/main"
    fi
  fi

  # Restore original branch regardless of outcome
  if [ -n "$_orig" ] && [ "$_orig" != "HEAD" ]; then
    git checkout "$_orig" 2>/dev/null || true
  fi

  if [ "$_push_ok" -eq 1 ]; then
    return 0
  fi

  local alert="$_LOOP_ALERT"
  mkdir -p "$(dirname "$alert")" 2>/dev/null || true
  if [ "$_rebase_ok" -eq 0 ]; then
    git rebase --abort 2>/dev/null || true
    printf '[%s] PR #%s: rebase conflict on %s — please rebase manually\n' \
      "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$pr" "$head_ref" >> "$alert"
  else
    printf '[%s] PR #%s: rebase succeeded but push failed on %s — please check manually\n' \
      "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$pr" "$head_ref" >> "$alert"
  fi
  return 0
}

# _loop_pr_merge_self_eager <num> <ci_state> <mergeable> <slug>
#   Merge a loop_self PR directly when CI is green and PR is conflict-free.
#   Does not rely on repo-level auto-merge (unreliable if not configured).
#   Same pattern as the bot_review=APPROVED gate.
#   NOTE: US-AUTO-044 introduces a spec-named _loop_pr_merge_self <PR#> that
#   (re)sets `gh pr merge --auto` instead of merging immediately. This eager
#   variant is the original US-AUTO-034 inbox behaviour; renamed to free the
#   spec name. Both coexist until Phase 2 collapses the inbox path.
_loop_pr_merge_self_eager() {
  local num="$1" ci_state="$2" mergeable="$3" slug="$4"
  [ "$ci_state" = "success" ] || return 0
  # Accept both the GraphQL `mergeable` enum (MERGEABLE) and the
  # `mergeStateStatus` enum (CLEAN). _loop_pr_inbox feeds mergeStateStatus, so
  # a ready-to-merge PR arrives as CLEAN in production — without it the eager
  # merge never fired and self-PRs silently waited on repo-level auto-merge.
  case "$mergeable" in MERGEABLE|CLEAN) ;; *) return 0 ;; esac
  gh -R "$slug" pr merge "$num" --squash --delete-branch >/dev/null 2>&1 \
    && info "PR #${num}: loop_self CI green — merged" \
    || warn "PR #${num}: loop_self merge failed — left open"
}

# _loop_pr_inbox
#   Walks open PRs and routes each by classification.
#   Lenient on gh unavailability — returns 0 so the loop continues to BACKLOG.
_loop_pr_inbox() {
  local slug; _gh_resolve slug || { _loop_write_tick "pr" "idle" "gh_unavailable"; return 0; }
  local prs_json
  prs_json=$(gh -R "$slug" pr list --state open \
    --json number,headRefName,author,title \
    2>/dev/null) || { _loop_write_tick "pr" "idle" "gh_error"; return 0; }
  [ -n "$prs_json" ] || { _loop_write_tick "pr" "idle" "empty_response"; return 0; }
  [ "$prs_json" = "[]" ] && { _loop_write_tick "pr" "idle" "no_open_prs"; return 0; }

  local count; count=$(echo "$prs_json" | jq 'length' 2>/dev/null || echo 0)
  [ "${count:-0}" -gt 0 ] || { _loop_write_tick "pr" "idle" "zero_prs"; return 0; }

  local i=0
  while [ "$i" -lt "$count" ]; do
    local num head_ref
    num=$(echo "$prs_json" | jq -r ".[$i].number")
    head_ref=$(echo "$prs_json" | jq -r ".[$i].headRefName")

    # Fetch CI + review state for this PR.
    local view_json
    view_json=$(gh -R "$slug" pr view "$num" \
      --json reviews,mergeStateStatus,statusCheckRollup \
      2>/dev/null) || { i=$((i + 1)); continue; }

    local human_review ci_state mergeable bot_review
    human_review=$(echo "$view_json" | jq -r '
      [.reviews[]? | select(.authorAssociation != "BOT" and .authorAssociation != "APP")]
      | last // {} | .state // ""' 2>/dev/null)
    bot_review=$(echo "$view_json" | jq -r '
      [.reviews[]? | select(.authorAssociation == "BOT" or .authorAssociation == "APP")]
      | last // {} | .state // ""' 2>/dev/null)
    mergeable=$(echo "$view_json" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
    ci_state=$(echo "$view_json" | jq -r '
      if (.statusCheckRollup | length) == 0 then ""
      elif any(.statusCheckRollup[]?; .conclusion == "FAILURE") then "failure"
      elif all(.statusCheckRollup[]?; .conclusion == "SUCCESS" or .conclusion == "SKIPPED") then "success"
      else "pending" end' 2>/dev/null)

    # Bot review gate: if a GHA workflow already handled this PR, defer to it.
    if [ "$bot_review" = "APPROVED" ]; then
      # All gates cleared (bot-approved + CI green + no conflicts) → merge directly.
      # Relying on repo-level auto-merge being configured is not reliable; loop
      # owns the decision here since it already ran the review.
      if [ "$ci_state" = "success" ] && { [ "$mergeable" = "MERGEABLE" ] || [ "$mergeable" = "CLEAN" ]; }; then
        gh -R "$slug" pr merge "$num" --squash --delete-branch >/dev/null 2>&1 \
          && info "PR #${num}: bot-approved + CI green — merged" \
          || warn "PR #${num}: merge failed (bot-approved + CI green) — left open"
      fi
      i=$((i + 1)); continue
    elif [ "$bot_review" = "CHANGES_REQUESTED" ]; then
      local alert="$_LOOP_ALERT"
      mkdir -p "$(dirname "$alert")" 2>/dev/null || true
      printf '[%s] PR #%s: bot review CHANGES_REQUESTED — loop PR rejected by GHA reviewer\n' \
        "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$num" >> "$alert"
      i=$((i + 1)); continue
    fi

    local verdict
    verdict=$(_loop_pr_classify "$head_ref" "$human_review" "$ci_state" "$mergeable")

    case "$verdict" in
      ci_red)
        _loop_pr_heal_self "$num" "$head_ref" "$slug" || true
        ;;
      stale)
        _loop_pr_rebase_circuit "$num" || true
        if _loop_pr_rebase_stale "$num" "$head_ref" || true; then
          # Re-fetch PR state after rebase — if now clean, merge immediately.
          local _re_view
          _re_view=$(gh -R "$slug" pr view "$num" --json mergeStateStatus,statusCheckRollup 2>/dev/null) || true
          if [ -n "$_re_view" ]; then
            local _re_ci _re_mb
            _re_ci=$(echo "$_re_view" | jq -r '
              if (.statusCheckRollup | length) == 0 then ""
              elif any(.statusCheckRollup[]?; .conclusion == "FAILURE") then "failure"
              elif all(.statusCheckRollup[]?; .conclusion == "SUCCESS" or .conclusion == "SKIPPED") then "success"
              else "pending" end' 2>/dev/null)
            _re_mb=$(echo "$_re_view" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
            _loop_pr_merge_self_eager "$num" "$_re_ci" "$_re_mb" "$slug"
          fi
        fi
        ;;
      ready)
        _loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
        ;;
    esac

    i=$((i + 1))
  done
  _loop_write_tick "pr" "acted" "inbox_done"
  return 0
}

# ── US-AUTO-044 Phase 1: dedicated PR Loop helpers ───────────────────────────
#
# Loop-safe building blocks for the future PR Loop (com.roll.pr.<slug>.plist,
# 5-min cadence). Phase 1 ships the helpers + tests only; Phase 2 (runner /
# plist / main-loop wiring) is wired by hand and out of scope here.
#
# All helpers are lenient on a missing `gh` binary or any gh failure: they
# return 0 so a loop iteration never aborts on a single bad PR. State writes
# reuse the US-AUTO-034 24h sliding-window breaker (_loop_pr_rebase_circuit)
# and pr_state writer.

# _loop_pr_close_with_comment <PR#> <reason>
#   Close a PR with an explanatory comment and append a warn-level _LOOP_ALERT.
#   Lenient: gh failure does not propagate.
_loop_pr_close_with_comment() {
  local pr="$1" reason="$2"
  [ -n "$pr" ] || return 0
  local slug; _gh_resolve slug || return 0
  gh -R "$slug" pr close "$pr" --comment "$reason" >/dev/null 2>&1 || true
  local alert="$_LOOP_ALERT"
  mkdir -p "$(dirname "$alert")" 2>/dev/null || true
  printf '[%s] [warn] PR #%s closed: %s\n' \
    "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$pr" "$reason" >> "$alert"
  return 0
}

# _loop_pr_empty_diff <PR#>
#   Exit 0 when the PR's diff against base is empty (changes already on main);
#   exit 1 otherwise. Lenient: gh failure is treated as "not empty" (exit 1)
#   so we never close a PR we couldn't inspect.
_loop_pr_empty_diff() {
  local pr="$1"
  [ -n "$pr" ] || return 1
  local slug; _gh_resolve slug || return 1
  local diff
  diff=$(gh -R "$slug" pr diff "$pr" 2>/dev/null) || return 1
  [ -z "$diff" ]
}

# _loop_pr_rebase <PR#> [head_ref]
#   fetch origin + rebase head onto origin/main + push. On conflict / failure,
#   record the attempt via _loop_pr_rebase_circuit (24h window). When the
#   breaker trips (>=3 attempts in 24h), close the PR with a comment.
#   Lenient: returns 0 on a clean rebase, 1 on failure (after recording).
_loop_pr_rebase() {
  local pr="$1" head_ref="${2:-}"
  [ -n "$pr" ] || return 0
  local slug; _gh_resolve slug || return 0

  if [ -z "$head_ref" ]; then
    head_ref=$(gh -R "$slug" pr view "$pr" --json headRefName -q .headRefName 2>/dev/null) || return 0
  fi
  [ -n "$head_ref" ] || return 0

  git fetch origin "$head_ref" 2>/dev/null || true
  git fetch origin main 2>/dev/null || true
  if git checkout "$head_ref" 2>/dev/null \
     && git rebase origin/main 2>/dev/null \
     && git push --force-with-lease origin "$head_ref" 2>/dev/null; then
    info "PR #${pr}: rebased ${head_ref} onto origin/main"
    return 0
  fi

  git rebase --abort 2>/dev/null || true
  git checkout - 2>/dev/null || true

  # Record the failed attempt; breaker tripping (exit 1) → close the PR.
  if ! _loop_pr_rebase_circuit "$pr"; then
    _loop_pr_close_with_comment "$pr" \
      "rebase failed 3+ times in 24h — please resolve conflicts manually"
  fi
  return 1
}

# _loop_pr_merge_self <PR#> [auto_merge_present]
#   For a loop/* self-authored PR: ensure auto-merge is armed. When
#   autoMergeRequest is already present (arg2 = "true") this is a no-op (the PR
#   is already waiting on CI). Otherwise (re)set `gh pr merge --auto`.
#   When arg2 is omitted, queries the PR for its autoMergeRequest state.
#   Lenient: gh failure does not propagate.
_loop_pr_merge_self() {
  local pr="$1" auto_present="${2:-}"
  [ -n "$pr" ] || return 0
  local slug; _gh_resolve slug || return 0

  if [ -z "$auto_present" ]; then
    local am
    am=$(gh -R "$slug" pr view "$pr" --json autoMergeRequest -q '.autoMergeRequest' 2>/dev/null)
    if [ -n "$am" ] && [ "$am" != "null" ]; then
      auto_present="true"
    else
      auto_present="false"
    fi
  fi

  if [ "$auto_present" = "true" ]; then
    return 0   # auto-merge already armed — wait for CI
  fi

  gh -R "$slug" pr merge "$pr" --auto --squash --delete-branch >/dev/null 2>&1 \
    && info "PR #${pr}: auto-merge (re)armed for loop self-PR" \
    || warn "PR #${pr}: failed to arm auto-merge — left open"
  return 0
}

# _loop_pr_prune_local <branch>
#   US-AUTO-044 Phase 2: once a self-PR merges (remote head already removed by
#   `--delete-branch` in _loop_pr_merge_self), prune the now-stale LOCAL branch
#   so the loop checkout doesn't accumulate merged refs (we hand-cleaned 134
#   such branches + 38 worktrees once — see FIX/owner cleanup 2026-05-31).
#   Uses -D, not -d: a squash-merge leaves the local branch looking unmerged to
#   git, so -d would refuse. Skips a branch still checked out in any worktree —
#   `git branch -D` errors on those (kimi peer-review Q3). Always exits 0.
_loop_pr_prune_local() {
  local branch="$1"
  [ -n "$branch" ] || return 0
  if git worktree list --porcelain 2>/dev/null | grep -q "^branch refs/heads/${branch}$"; then
    return 0   # checked out in a worktree — leave it for that worktree's teardown
  fi
  git branch -D "$branch" >/dev/null 2>&1 || true
  return 0
}

# _loop_pr_route <PR_json>
#   Single-PR dispatcher for the PR Loop. <PR_json> is one object from
#   `gh pr list --json number,headRefName,isDraft,mergeable,mergeStateStatus,
#    autoMergeRequest`. Routes to the matching handler:
#     claude/*                          → skip (GHA bot review owns it)
#     loop/* with autoMergeRequest      → skip (already armed)
#     loop/* without autoMergeRequest   → _loop_pr_merge_self (arm auto-merge)
#     isDraft                           → skip
#     BEHIND                            → _loop_pr_rebase
#     CLEAN + MERGEABLE + no auto-merge → _loop_pr_merge_self (arm auto-merge)
#     empty diff                        → _loop_pr_close_with_comment
#     otherwise (CI pending / blocked)  → skip
#   Prints the chosen route token (for tests / logs). Always exits 0.
_loop_pr_route() {
  local json="$1"
  [ -n "$json" ] || { echo "skip"; return 0; }

  local num head_ref is_draft mergeable merge_state auto_merge
  num=$(echo "$json" | jq -r '.number // ""' 2>/dev/null)
  head_ref=$(echo "$json" | jq -r '.headRefName // ""' 2>/dev/null)
  is_draft=$(echo "$json" | jq -r '.isDraft // false' 2>/dev/null)
  mergeable=$(echo "$json" | jq -r '.mergeable // ""' 2>/dev/null)
  merge_state=$(echo "$json" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
  auto_merge=$(echo "$json" | jq -r 'if (.autoMergeRequest // null) == null then "false" else "true" end' 2>/dev/null)

  [ -n "$num" ] || { echo "skip"; return 0; }

  case "$head_ref" in
    claude/*)
      echo "skip_claude"; return 0 ;;
    loop/*)
      if [ "$auto_merge" = "true" ]; then
        echo "skip_loop_auto_armed"
      else
        _loop_pr_merge_self "$num" "$auto_merge" >/dev/null 2>&1 || true
        echo "merge_self"
      fi
      return 0 ;;
  esac

  if [ "$is_draft" = "true" ]; then
    echo "skip_draft"; return 0
  fi

  if [ "$merge_state" = "BEHIND" ]; then
    _loop_pr_rebase "$num" "$head_ref" >/dev/null 2>&1 || true
    echo "rebase"; return 0
  fi

  if [ "$merge_state" = "CLEAN" ] && [ "$mergeable" = "MERGEABLE" ] && [ "$auto_merge" != "true" ]; then
    _loop_pr_merge_self "$num" "$auto_merge" >/dev/null 2>&1 || true
    echo "set_auto_merge"; return 0
  fi

  if _loop_pr_empty_diff "$num"; then
    _loop_pr_close_with_comment "$num" \
      "changes already merged to main — empty diff, closing automatically" \
      >/dev/null 2>&1 || true
    echo "close_empty"; return 0
  fi

  echo "skip"
  return 0
}

# _alert_log_file — echo path to alert-log.jsonl (used by `roll alert log` CLI).
_alert_log_file() {
  local dir=".roll/state"
  mkdir -p "$dir" 2>/dev/null || true
  echo "${dir}/alert-log.jsonl"
}

# FIX-070: flip a story row in the main repo's .roll/backlog.md between
# 📋 Todo and 🔨 In Progress. The cycle worktree is gitignored at .roll/,
# so editing the worktree copy + committing leaves no trace in git — and
# main's backlog (which roll-brief reads) stays stale. These helpers write
# directly to ${ROLL_MAIN_PROJECT}/.roll/backlog.md instead.
#
# _loop_mark_in_progress <story-id> [backlog-path]
#   Replace "📋 Todo" with "🔨 In Progress" on the row containing <story-id>.
#   No-op when backlog or row is missing (idempotent retries don't error).
_loop_mark_in_progress() {
  local story_id="$1"
  local backlog="${2:-${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md}"
  [ -n "$story_id" ] || return 1
  [ -f "$backlog" ] || return 0
  local tmp; tmp=$(mktemp "${backlog}.XXXXXX") || return 1
  # FIX-106: match the story-id column (col 2) for equality instead of doing
  # substring match on the whole row. Pre-fix, picking US-X-001 also flipped
  # any row whose description contained "depends-on:US-X-001" — leaving the
  # dashboard claiming work on stories no one had picked.
  awk -v sid="$story_id" '
    {
      if (index($0, "📋 Todo") > 0) {
        n = split($0, cols, "|")
        if (n >= 2) {
          id_cell = cols[2]
          gsub(/[[:space:]]/, "", id_cell)
          # Markdown link form "[ID](path)" → keep just "ID"
          sub(/^\[/, "", id_cell)
          sub(/\].*$/, "", id_cell)
          if (id_cell == sid) {
            sub(/📋 Todo/, "🔨 In Progress")
          }
        }
      }
      print
    }
  ' "$backlog" > "$tmp" && mv "$tmp" "$backlog"
}

# US-AGENT-008: _loop_mark_hold <story-id> <reason> [backlog-path]
#   Flip "🔨 In Progress" or "📋 Todo" row to "🚫 Hold" with a parenthetical
#   reason suffix appended to the description column. Idempotent — if the
#   row is already 🚫 Hold the call is a no-op (status compare is exact).
_loop_mark_hold() {
  local story_id="$1"
  local reason="${2:-self-downgrade}"
  local backlog="${3:-${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md}"
  [ -n "$story_id" ] || return 1
  [ -f "$backlog" ] || return 0
  local tmp; tmp=$(mktemp "${backlog}.XXXXXX") || return 1
  awk -v sid="$story_id" -v reason="$reason" '
    {
      line = $0
      changed = 0
      if (index(line, "🔨 In Progress") > 0 || index(line, "📋 Todo") > 0) {
        n = split(line, cols, "|")
        if (n >= 2) {
          id_cell = cols[2]
          gsub(/[[:space:]]/, "", id_cell)
          sub(/^\[/, "", id_cell)
          sub(/\].*$/, "", id_cell)
          if (id_cell == sid) {
            sub(/🔨 In Progress/, "🚫 Hold", line)
            sub(/📋 Todo/, "🚫 Hold", line)
            # Append reason to the description column (cols[3]) only if not
            # already present.
            if (index(line, "→ " reason) == 0 && index(line, "(" reason ")") == 0) {
              # Insert before the trailing " | 🚫 Hold |"
              sub(/ \| 🚫 Hold \|/, " → " reason " | 🚫 Hold |", line)
            }
            changed = 1
          }
        }
      }
      print line
    }
  ' "$backlog" > "$tmp" && mv "$tmp" "$backlog"
}

# US-AGENT-008: self-downgrade primitive.
#   _loop_self_downgrade <story-id> <reason> <sub-ids-csv>
# Flips story to 🚫 Hold (with sub list embedded), writes ALERT, emits a
# story_self_downgrade event. The actual sub-story rows + feature md are
# produced by the SKILL invocation of roll-design --from-story (this helper
# just records the contract).
_loop_self_downgrade() {
  local story_id="$1"
  local reason="${2:-too_big}"
  local subs="${3:-}"
  [ -n "$story_id" ] || return 1
  local backlog="${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md"
  local annotation
  if [ -n "$subs" ]; then
    annotation="split to ${subs}"
  else
    annotation="$reason"
  fi
  _loop_mark_hold "$story_id" "$annotation" "$backlog" || true

  # ALERT line for human visibility. Slug derives from main project dir.
  local main_dir="${ROLL_MAIN_PROJECT:-$PWD}"
  local slug; slug=$(_project_slug "$main_dir" 2>/dev/null || basename "$main_dir")
  local shared_root="${_SHARED_ROOT:-$HOME/.shared/roll}"
  local alert_file="${shared_root}/loop/ALERT-${slug}.md"
  mkdir -p "$(dirname "$alert_file")"
  printf '[%s] self-downgrade: %s — reason: %s; subs: %s\n' \
    "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$story_id" "$reason" "${subs:-<none>}" \
    >> "$alert_file"

  # Best-effort event emission (tolerates missing helpers).
  if declare -F _loop_event >/dev/null 2>&1; then
    _loop_event "agent_self_downgrade" "${LOOP_CYCLE_ID:-$story_id}" "$story_id" "${reason}|${subs}" || true
  fi
  echo "[loop] self-downgrade ${story_id}: ${reason}; subs=${subs:-<none>}"
}

# US-AGENT-009: _loop_chain_depth_cap_check <story-id> [backlog]
#   Returns 0 when auto re-split is still allowed (story's chain_depth < 2),
#   1 when the cap is hit (≥ 2 already — the third re-split would be #3 in
#   the chain). Reads chain_depth from the story's feature md profile;
#   missing profile is treated as depth 0 (split allowed).
_loop_chain_depth_cap_check() {
  local story_id="$1"
  local backlog="${2:-${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md}"
  [ -n "$story_id" ] || return 0
  [ -f "$backlog" ] || return 0

  # Resolve feature md from backlog link.
  local md_path
  md_path=$(grep -E "\[${story_id}\]\(" "$backlog" 2>/dev/null \
    | head -1 \
    | sed -E "s/.*\[${story_id}\]\(([^)#]+)#?[^)]*\).*/\1/")
  [ -n "$md_path" ] || return 0
  [ -f "$md_path" ] || return 0

  # Find the section for this story id and extract chain_depth.
  local anchor; anchor=$(echo "$story_id" | tr '[:upper:]' '[:lower:]')
  local depth
  depth=$(awk -v anchor="$anchor" '
    /<a id="/ {
      if (match($0, /<a id="[^"]+"/)) {
        cur = substr($0, RSTART + 7, RLENGTH - 8)
        in_section = (cur == anchor)
      }
      next
    }
    in_section && /^- chain_depth:/ {
      gsub(/^- chain_depth:[ \t]*/, "")
      gsub(/[ \t].*$/, "")
      print
      exit
    }
  ' "$md_path")

  # Empty / non-numeric → treat as 0.
  case "$depth" in
    ''|*[!0-9]*) depth=0 ;;
  esac

  [ "$depth" -lt 2 ]
}

# US-AGENT-009: cap-hit path. Story has reached chain_depth ≥ 2 — refuse
# further auto re-split, flip 🚫 Hold + write a high-priority ALERT with
# chain context for human triage.
_loop_split_cap_hit() {
  local story_id="$1"
  local reason="${2:-cap-hit}"
  [ -n "$story_id" ] || return 1
  local backlog="${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md"
  _loop_mark_hold "$story_id" "StorySplitCapHit: $reason" "$backlog" || true

  local main_dir="${ROLL_MAIN_PROJECT:-$PWD}"
  local slug; slug=$(_project_slug "$main_dir" 2>/dev/null || basename "$main_dir")
  local shared_root="${_SHARED_ROOT:-$HOME/.shared/roll}"
  local alert_file="${shared_root}/loop/ALERT-${slug}.md"
  mkdir -p "$(dirname "$alert_file")"
  printf '[%s] StorySplitCapHit: %s — chain_depth >= 2 (third auto-split refused). %s. Human triage required.\n' \
    "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$story_id" "$reason" \
    >> "$alert_file"

  if declare -F _loop_event >/dev/null 2>&1; then
    _loop_event "story_split_cap_hit" "${LOOP_CYCLE_ID:-$story_id}" "$story_id" "$reason" || true
  fi
  echo "[loop] StorySplitCapHit ${story_id}: chain_depth >= 2 — held for human triage"
}

# US-SKILL-010: unified self-score note writer for roll-build /
# roll-fix / roll-design. Lands under .roll/notes/ with YAML frontmatter
# so subsequent stories (US-SKILL-014 trend, US-SKILL-015 docs) can
# read/aggregate without parsing free text.
#
# Args: skill story_id score:int verdict [rationale...]
_skill_write_self_score() {
  local skill="${1:-}"
  local story="${2:-}"
  local score="${3:-}"
  local verdict="${4:-}"
  shift 4 2>/dev/null || true
  local rationale="${*:-}"

  case "$skill" in
    roll-build|roll-fix|roll-design) ;;
    *) err "_skill_write_self_score: skill must be roll-build / roll-fix / roll-design (got '$skill')"; return 1 ;;
  esac
  [ -n "$story" ] || { err "_skill_write_self_score: story id required"; return 1; }
  case "$score" in
    ''|*[!0-9]*) err "_skill_write_self_score: score must be integer 1..10"; return 1 ;;
  esac
  if [ "$score" -lt 1 ] || [ "$score" -gt 10 ]; then
    err "_skill_write_self_score: score out of range (1..10): $score"
    return 1
  fi
  case "$verdict" in
    good|ok|regression) ;;
    *) err "_skill_write_self_score: verdict must be good / ok / regression (got '$verdict')"; return 1 ;;
  esac

  # FIX-176: anchor notes to <project>/.roll/notes regardless of cwd. When
  # invoked with cwd already inside the .roll dir (e.g. a self-score run from
  # within .roll), the bare ".roll/notes" doubled to .roll/.roll/notes.
  local notes_dir=".roll/notes"
  if [ "$(basename "$PWD")" = ".roll" ]; then
    notes_dir="notes"
  fi
  mkdir -p "$notes_dir"
  local ts; ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
  local date_part="${ts%%T*}"
  local epoch; epoch=$(date -u +%s)
  local file="${notes_dir}/${date_part}-${skill}-${story}-${epoch}.md"

  {
    printf -- '---\n'
    printf 'skill: %s\n' "$skill"
    printf 'story: %s\n' "$story"
    printf 'score: %s\n' "$score"
    printf 'verdict: %s\n' "$verdict"
    printf 'ts: %s\n' "$ts"
    printf -- '---\n'
    printf '\n'
    printf '%s\n' "$rationale"
  } > "$file"
}

# _loop_mark_todo <story-id> [backlog-path]
#   Revert a row from "🔨 In Progress" back to "📋 Todo". Called when a
#   cycle's executor fails so the next cycle can pick the story up again.
_loop_mark_todo() {
  local story_id="$1"
  local backlog="${2:-${ROLL_MAIN_PROJECT:-$PWD}/.roll/backlog.md}"
  [ -n "$story_id" ] || return 1
  [ -f "$backlog" ] || return 0
  local tmp; tmp=$(mktemp "${backlog}.XXXXXX") || return 1
  # FIX-106: same column-2 equality match as _loop_mark_in_progress.
  awk -v sid="$story_id" '
    {
      if (index($0, "🔨 In Progress") > 0) {
        n = split($0, cols, "|")
        if (n >= 2) {
          id_cell = cols[2]
          gsub(/[[:space:]]/, "", id_cell)
          sub(/^\[/, "", id_cell)
          sub(/\].*$/, "", id_cell)
          if (id_cell == sid) {
            sub(/🔨 In Progress/, "📋 Todo")
          }
        }
      }
      print
    }
  ' "$backlog" > "$tmp" && mv "$tmp" "$backlog"
}

# FIX-048: report story IDs already claimed by open loop/* PRs so a new cycle
# can skip them before scanning BACKLOG. Without this gate, a cycle launched
# before the previous cycle's PR merges would re-pick the same Todo story
# (its worktree branches from main, where the 🔨 mark is not yet visible).
#
# _loop_pr_claimed_stories
#   Stdout: one story ID per line, deduped. Empty when nothing claimed.
#   Exit:   0 always (lenient: gh missing / API failure → empty output).
_loop_pr_claimed_stories() {
  local slug; _gh_resolve slug || return 0
  local branches
  branches=$(gh -R "$slug" pr list --state open \
    --json headRefName \
    --jq '.[] | select(.headRefName | startswith("loop/")) | .headRefName' \
    2>/dev/null) || return 0
  [ -n "$branches" ] || return 0

  local branch claimed=""
  while IFS= read -r branch; do
    [ -n "$branch" ] || continue
    local content
    content=$(gh -R "$slug" api \
      "repos/${slug}/contents/.roll/backlog.md?ref=${branch}" \
      -H "Accept: application/vnd.github.raw" 2>/dev/null) || continue
    [ -n "$content" ] || continue
    local ids
    ids=$(printf '%s\n' "$content" \
      | awk -F'|' '/🔨 In Progress/ {
          gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)
          sub(/^\[/, "", $2)
          sub(/\].*$/, "", $2)
          if ($2 != "") print $2
        }')
    [ -n "$ids" ] && claimed="${claimed}${ids}"$'\n'
  done <<< "$branches"

  printf '%s' "$claimed" | awk 'NF' | sort -u
}

# US-CL-004: changelog 风格守门 Phase 1 — mechanical linter.
#
# _changelog_lint_bullet <bullet-text>
#   Stdout: one violation tag per line; empty = bullet passes.
#   Exit:   0 always (callers read the output stream, not the exit code).
#
# Violation tags:
#   backtick-identifier  `…` contains `_` or `()`  (e.g. `_foo`, `bar()`)
#   file-suffix          `.md`/`.sh`/`.yml`/`.ts`/`.bats` outside backticks
#   internal-word        Phase N / Step N / Helper / Schema / Fixture / Refactor
#   over-length          > 50 visible chars (UTF-8 codepoints; 中文按字符计)
#   path-fragment        docs/ / bin/ / tests/ / scripts/ outside backticks
#
# Backticks are treated as the "user-quoted" zone — content there is assumed
# to be a real user command (e.g. `roll edit notes.md`) and is excluded from
# the file-suffix / path-fragment checks.
_changelog_lint_bullet() {
  local bullet="$1"
  local stripped
  stripped=$(printf '%s' "$bullet" | sed -E 's/`[^`]*`//g')

  if printf '%s' "$bullet" | grep -qE '`[^`]*(_|\(\))[^`]*`'; then
    echo "backtick-identifier"
  fi
  if printf '%s' "$stripped" | grep -qE '\.(md|sh|yml|ts|bats)([^A-Za-z0-9]|$)'; then
    echo "file-suffix"
  fi
  if printf '%s' "$bullet" | grep -qE '(Phase|Step)[[:space:]]+[0-9]+|Helper|Schema|Fixture|Refactor'; then
    echo "internal-word"
  fi
  local len
  len=$(printf '%s' "$bullet" | LC_ALL=C.UTF-8 wc -m | tr -d ' ')
  if [ "${len:-0}" -gt 50 ]; then
    echo "over-length"
  fi
  if printf '%s' "$stripped" | grep -qE '(^|[^A-Za-z0-9_])(\.roll|docs|bin|tests|scripts)/'; then
    echo "path-fragment"
  fi
  return 0
}

# US-CL-004: changelog few-shot style anchors — extract bullets from the
# most recent 3 published `## v...` sections of CHANGELOG.md (skipping
# `## Unreleased`). Cap at ~1500 chars so the agent's context stays lean.
#
# _changelog_style_anchors [changelog-path]
#   Stdout: concatenated bullet lines from the last 3 released versions.
#   Exit:   0 (empty output when no CHANGELOG.md or no released sections).
_changelog_style_anchors() {
  local changelog="${1:-CHANGELOG.md}"
  [ -f "$changelog" ] || return 0
  awk '
    /^## v/ { ver++; if (ver > 3) exit; printing = 1; next }
    /^## /  { printing = 0 }
    printing && /^- / { print }
  ' "$changelog" | head -c 1500 || true
}

# US-CL-005: changelog 风格守门 Phase 2 — self-audit gate.
#
# _changelog_audit_bullet <bullet>
#   Stricter than _changelog_lint_bullet: 5 boolean rules, 30-char cap.
#   Stdout: one failed-rule tag per line; empty = bullet passes.
#   Exit:   0 always.
#
# Rules:
#   over-length-30   visible chars > 30 AND no backtick (user-cmd escape hatch)
#   internal-id      backtick content contains `_` or `()`
#   path-or-suffix   .md/.sh/.yml/.ts/.bats or docs/bin/tests/scripts/ outside backticks
#   phase-step       `Phase N` / `Step N` workflow vocabulary
#   bad-shape        no `—` (em dash) AND no `不再` AND no `现在` keyword
_changelog_audit_bullet() {
  local bullet="$1"
  local stripped
  stripped=$(printf '%s' "$bullet" | sed -E 's/`[^`]*`//g')

  # Rule 1: length cap 30 (user-command in backticks bypasses this rule).
  if ! printf '%s' "$bullet" | grep -q '`'; then
    local len
    len=$(printf '%s' "$bullet" | LC_ALL=C.UTF-8 wc -m | tr -d ' ')
    if [ "${len:-0}" -gt 30 ]; then
      echo "over-length-30"
    fi
  fi

  # Rule 2: internal identifier inside backticks.
  if printf '%s' "$bullet" | grep -qE '`[^`]*(_|\(\))[^`]*`'; then
    echo "internal-id"
  fi

  # Rule 3: file suffix / path fragment outside backticks.
  if printf '%s' "$stripped" | grep -qE '\.(md|sh|yml|ts|bats)([^A-Za-z0-9]|$)' \
    || printf '%s' "$stripped" | grep -qE '(^|[^A-Za-z0-9_])(\.roll|docs|bin|tests|scripts)/'; then
    echo "path-or-suffix"
  fi

  # Rule 4: workflow vocabulary.
  if printf '%s' "$bullet" | grep -qE '(Phase|Step)[[:space:]]+[0-9]+'; then
    echo "phase-step"
  fi

  # Rule 5: required shape — em dash, 不再, or 现在.
  if ! printf '%s' "$bullet" | LC_ALL=C.UTF-8 grep -qE '—|不再|现在'; then
    echo "bad-shape"
  fi

  return 0
}

# _changelog_audit_log <verdict> <round> <bullet> [<reason>...]
#   Append a JSONL record to the audit log. Path overridable via
#   ROLL_CHANGELOG_AUDIT_LOG (tests use this to stay out of $HOME).
_changelog_audit_log() {
  local verdict="$1" round="$2" bullet="$3"
  shift 3
  local log="${ROLL_CHANGELOG_AUDIT_LOG:-${_SHARED_ROOT}/loop/changelog-audit.jsonl}"
  mkdir -p "$(dirname "$log")"
  local ts; ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
  local reasons_json='[]'
  if [ "$#" -gt 0 ]; then
    reasons_json=$(printf '%s\n' "$@" | jq -R . | jq -sc .)
  fi
  jq -nc \
    --arg ts "$ts" \
    --arg verdict "$verdict" \
    --argjson round "$round" \
    --arg bullet "$bullet" \
    --argjson reasons "$reasons_json" \
    '{ts:$ts, verdict:$verdict, round:$round, bullet:$bullet, reasons:$reasons}' \
    >> "$log"
}

# _changelog_audit_gate <round1> [<round2> <round3>]
#   Run up to 3 candidate bullets through _changelog_audit_bullet.
#   First clean candidate wins: print bullet to stdout, exit 0.
#   All 3 failed: print ⚠️-prefixed last candidate, append ALERT, exit 1.
#   Each round writes a _changelog_audit_log record.
_changelog_audit_gate() {
  local i=0 last=""
  for candidate in "$@"; do
    i=$((i + 1))
    last="$candidate"
    local viols
    # shellcheck disable=SC2207
    viols=( $(_changelog_audit_bullet "$candidate") )
    if [ "${#viols[@]}" -eq 0 ]; then
      _changelog_audit_log pass "$i" "$candidate"
      printf '%s\n' "$candidate"
      return 0
    fi
    _changelog_audit_log fail "$i" "$candidate" "${viols[@]}"
    [ "$i" -ge 3 ] && break
  done
  # All 3 rounds failed (or fewer if caller passed < 3).
  mkdir -p "$(dirname "$_LOOP_ALERT")" 2>/dev/null
  {
    echo ""
    echo "# ALERT — changelog audit failed after $i rounds"
    echo "**Time**: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
    echo "**Bullet**: $last"
    echo "**Action**: kept under \`## Unreleased\` with ⚠️ prefix; human review recommended."
  } >> "$_LOOP_ALERT"
  printf '⚠️ %s\n' "$last"
  return 1
}

# US-AUTO-036: worktree helpers (loop-safe pure additions).
#
# Phase 1 of worktree isolation — these helpers are NOT yet called by
# runner.sh. US-AUTO-037 (wired by hand) wires them into
# _write_loop_runner_script. Do not delete or inline; they are unit-tested
# in tests/unit/roll_worktree.bats.

# _worktree_path <slug> <us-id>
#   Echoes the canonical worktree directory for a (project, story) pair.
_worktree_path() {
  echo "${_SHARED_ROOT}/worktrees/${1}-${2}"
}

# _worktree_alert <msg>
#   Append a timestamped line to $_LOOP_ALERT. Used by failure paths in
#   _worktree_merge_back to surface stuck worktrees.
_worktree_alert() {
  mkdir -p "$(dirname "$_LOOP_ALERT")" 2>/dev/null
  printf '[%s] worktree: %s\n' "$(date -u +%FT%TZ)" "$1" >> "$_LOOP_ALERT"
}

# _worktree_create <path> <branch> <base>
#   Create a worktree at <path> on a new branch <branch> rooted at <base>.
#   Idempotent: if <branch> already exists locally (from a prior failed
#   run) it is deleted first so `git worktree add -b` does not error.
_worktree_create() {
  local path="$1" branch="$2" base="$3"
  mkdir -p "$(dirname "$path")"
  if [ -e "$path" ]; then
    git worktree remove --force "$path" 2>/dev/null || true
    rm -rf "$path" 2>/dev/null || true
  fi
  if git show-ref --verify --quiet "refs/heads/${branch}"; then
    git branch -D "$branch" >/dev/null 2>&1 || true
  fi
  git worktree add "$path" -b "$branch" "$base"
}

# _worktree_cleanup <path> <branch>
#   Remove the worktree at <path> and delete <branch>. Tolerant when
#   either is already absent so retries / partial-failure rollback is safe.
_worktree_cleanup() {
  local path="$1" branch="$2"
  git worktree remove --force "$path" 2>/dev/null || true
  rm -rf "$path" 2>/dev/null || true
  git branch -D "$branch" 2>/dev/null || true
  return 0
}

# _worktree_fetch_origin <branch>
#   `git fetch origin <branch>` quietly. Lenient on failure: a missing
#   remote / network blip must not derail the loop, so we return 0 even
#   when fetch fails (the loop's later ff-only check is the strict gate).
_worktree_fetch_origin() {
  local branch="$1"
  if ! git fetch origin "$branch" --quiet 2>/dev/null; then
    echo "[worktree] fetch origin ${branch} failed (lenient, continuing)" >&2
  fi
  return 0
}

# _worktree_submodule_init <path>
#   Run `git submodule update --init --recursive` inside the worktree at
#   <path> so its working tree is materially complete. Runs in a subshell
#   (cd is local) so the caller's cwd and the parent worktree's submodule
#   state are untouched. Returns submodule update's exit code.
_worktree_submodule_init() {
  local path="$1"
  ( cd "$path" && git submodule update --init --recursive --quiet )
}

# _worktree_sync_meta <path>
#   FIX-069: Copy main repo's .roll/ meta (backlog, skills, conventions,
#   features, decisions) into the cycle worktree as a read-only reference.
#   Without this, the loop runs in a clean git clone with no .roll/ (it's
#   gitignored), so Claude finds no backlog and no skill entry points —
#   the whole cycle no-ops.
#
#   Excludes runtime state listed in .roll/.gitignore plus loop event/run
#   logs, so the worktree never inherits main's live cycle state.
#   Single-shot: never written back; the worktree copy is thrown away with
#   the worktree itself.
_worktree_sync_meta() {
  local path="$1"
  [ -d ".roll" ] || return 0
  rsync -a \
    --exclude='.git/' \
    --exclude='state/' \
    --exclude='scratch/' \
    --exclude='*.lock' \
    --exclude='last-test-pass' \
    --exclude='events.ndjson*' \
    --exclude='runs.jsonl*' \
    .roll/ "$path/.roll/" 2>/dev/null || true
  # FIX-085: hard-constrain the "skip 🔨 In Progress" rule from the runner
  # side. SKILL.md tells the agent to skip 🔨 rows but agents don't always
  # comply, so we strip those rows from the worktree's backlog copy — the
  # agent literally can't pick a row it can't see. Main backlog untouched.
  if [ -f "$path/.roll/backlog.md" ]; then
    sed -i.bak '/| 🔨 In Progress |$/d' "$path/.roll/backlog.md" 2>/dev/null || true
    rm -f "$path/.roll/backlog.md.bak"
  fi
}

# US-LOOP-068: set up .roll/ inside the product worktree as a roll-meta git worktree.
# Must be called after the product worktree is created.
# Returns 0 on success, 1 on failure.
_loop_roll_meta_worktree_setup() {
  local wt="$1" branch="$2" project_path="$3"
  local meta_repo="${project_path}/.roll"
  [ -d "$meta_repo/.git" ] || { echo "[loop] roll-meta setup: ${meta_repo} is not a git repo"; return 1; }
  rm -rf "${wt}/.roll"
  git -C "$meta_repo" worktree add "${wt}/.roll" -b "$branch" "origin/main"
}

# US-LOOP-068: clean up the roll-meta worktree inside the product worktree.
_loop_roll_meta_worktree_cleanup() {
  local wt="$1" branch="$2" project_path="$3"
  local meta_repo="${project_path}/.roll"
  [ -d "$meta_repo/.git" ] || return 0
  git -C "$meta_repo" worktree remove --force "${wt}/.roll" 2>/dev/null || true
  rm -rf "${wt}/.roll" 2>/dev/null || true
  git -C "$meta_repo" branch -D "$branch" 2>/dev/null || true
}

# US-LOOP-068: publish a PR from the roll-meta worktree.
_loop_roll_meta_publish() {
  local wt="$1" branch="$2" title="$3"
  (cd "${wt}/.roll" && _loop_publish_pr "$branch" "$title")
}

# US-LOOP-068: run roll-meta test gate when .roll/ops/ files are modified.
# Returns 0 when no ops changes or tests pass; 1 when tests fail.
_loop_roll_meta_test_gate() {
  local wt="$1"
  local _ops_changed
  _ops_changed=$(cd "${wt}/.roll" && git diff --name-only origin/main..HEAD 2>/dev/null | grep '^ops/' || true)
  [ -n "$_ops_changed" ] || return 0
  if ! command -v bats >/dev/null 2>&1; then
    echo "[loop] roll-meta ops/ changed but bats not installed — skipping test gate"
    return 0
  fi
  local _test_dir="${wt}/.roll/ops/tests"
  if [ ! -d "$_test_dir" ]; then
    echo "[loop] roll-meta ops/ changed but no $_test_dir found — skipping test gate"
    return 0
  fi
  echo "[loop] roll-meta test gate: bats ${_test_dir}"
  if ! bats "${_test_dir}" 2>/dev/null; then
    echo "[loop] roll-meta test gate failed"
    return 1
  fi
  echo "[loop] roll-meta test gate passed"
  return 0
}

# _worktree_merge_back <branch>
#   Caller must be in the main worktree (cwd = main). Steps:
#     1. git pull --ff-only origin main   (sync local main with remote)
#     2. git merge --ff-only <branch>     (linear merge of loop branch)
#     3. git push origin main             (publish)
#   Any failure → write to $_LOOP_ALERT and return 1 (worktree is left
#   in place by the caller for human inspection, per US-AUTO-036 non-goal).
_worktree_merge_back() {
  local branch="$1"
  if ! git pull --ff-only origin main --quiet 2>/dev/null; then
    _worktree_alert "pull --ff-only origin main failed (remote diverged?)"
    return 1
  fi
  # FIX-E (2026-05-25): only doc-only branches may take the ff-merge fast path.
  # Code changes must go through PR + CI; bypassing that gate has caused
  # cycle commits to land on main with red CI, undetected. The check is the
  # same one publish_push uses for the doc-vs-code split; here we apply it
  # to the loop branch's diff against the (already-pulled) main HEAD so the
  # bypass is also closed in the gh-unavailable fallback path. The ALERT
  # body is targeted at the next cycle's agent (human-on-the-loop), telling
  # it how to retry through the normal flow rather than waiting for a human.
  local _changed
  _changed=$(git diff --name-only HEAD.."$branch" 2>/dev/null)
  if [ -n "$_changed" ] \
     && echo "$_changed" | grep -qvE '^(\.roll/|CHANGELOG\.md|guide/|site/|\.claude/|BACKLOG\.md|PROPOSALS\.md|docs/)'; then
    _worktree_alert "$(printf '%s\n' \
      "## PRIOR CYCLE FAILED PUBLISH — REQUIRES NORMAL PR FLOW (FIX-E)" \
      "branch: ${branch}" \
      "reason: _loop_publish_pr failed (gh unavailable / API error); ff-merge" \
      "        fallback was refused because the branch contains code changes." \
      "        Code must not bypass PR+CI." \
      "" \
      "next-cycle action (agent reads this and retries automatically):" \
      "  1. SKIP normal Todo scan this cycle." \
      "  2. git push origin ${branch}" \
      "  3. gh pr create --base main --head ${branch}" \
      "  4. gh pr merge ${branch} --auto --squash --delete-branch" \
      "  5. exit cleanly so CI runs and auto-merge takes over.")"
    return 1
  fi
  if ! git merge --ff-only "$branch" --quiet 2>/dev/null; then
    _worktree_alert "merge --ff-only ${branch} failed (not fast-forwardable from main)"
    return 1
  fi
  if ! git push origin main --quiet 2>/dev/null; then
    _worktree_alert "push origin main failed after merging ${branch}"
    return 1
  fi
  return 0
}

# _claude_remote_snapshot [repo]
#   Echo the current set of remote `claude/*` branch names (sans
#   refs/heads/), one per line, sorted. Silent on remote unreachable / no
#   remote / no matches — empty stdout, exit 0.
_claude_remote_snapshot() {
  local repo="${1:-.}"
  git -C "$repo" ls-remote --heads origin 'refs/heads/claude/*' 2>/dev/null \
    | awk '{print $2}' \
    | sed 's|^refs/heads/||' \
    | sort
}

# _claude_cleanup_new_branches <prior> [repo]
#   Delete remote `claude/*` branches present now but absent from <prior>
#   (newline-separated list, as emitted by _claude_remote_snapshot). Skips
#   silently when origin is not a GitHub remote. Each successful delete logs
#   one INFO line; failures are silently ignored so the loop's main flow is
#   never derailed.
_claude_cleanup_new_branches() {
  local prior="$1"
  local repo="${2:-.}"
  local url; url=$(git -C "$repo" remote get-url origin 2>/dev/null)
  [[ "$url" == *github.com* ]] || return 0
  local current; current=$(_claude_remote_snapshot "$repo")
  [ -z "$current" ] && return 0
  local prior_sorted; prior_sorted=$(printf '%s\n' "$prior" | sort -u)
  local new_branches
  new_branches=$(comm -13 <(printf '%s\n' "$prior_sorted") <(printf '%s\n' "$current"))
  [ -z "$new_branches" ] && return 0
  while IFS= read -r branch; do
    [ -z "$branch" ] && continue
    if git -C "$repo" push origin --delete "$branch" 2>/dev/null; then
      echo "[loop] deleted stale claude branch: $branch"
    fi
  done <<< "$new_branches"
  return 0
}

# _claude_cleanup_stale_worktrees [project_path]
#   Remove local worktrees under <project_path>/.claude/worktrees/ whose
#   branch has been fully merged into main (merge-base --is-ancestor). Active
#   worktrees (branch ahead of main) are preserved. Runs `git worktree prune`
#   afterwards to clear stale metadata. Silent on missing directory or any
#   individual failure so the loop's main flow is never derailed.
_claude_cleanup_stale_worktrees() {
  local project_path="${1:-.}"
  local wt_dir="${project_path}/.claude/worktrees"
  [ -d "$wt_dir" ] || return 0
  local entry branch
  for entry in "$wt_dir"/*/; do
    [ -d "$entry" ] || continue
    branch=$(git -C "$project_path" worktree list --porcelain 2>/dev/null \
      | awk -v p="${entry%/}" '
          /^worktree / { cur=$2; flag=(cur==p) }
          /^branch /   && flag { sub(/^refs\/heads\//, "", $2); print $2; flag=0 }
        ')
    [ -z "$branch" ] && branch=$(git -C "$entry" symbolic-ref --short HEAD 2>/dev/null)
    [ -z "$branch" ] && continue
    if git -C "$project_path" merge-base --is-ancestor "$branch" main 2>/dev/null; then
      git -C "$project_path" worktree remove --force "$entry" 2>/dev/null || true
      rm -rf "$entry" 2>/dev/null || true
      git -C "$project_path" branch -D "$branch" 2>/dev/null || true
      echo "[loop] removed stale worktree: $branch"
    fi
  done
  git -C "$project_path" worktree prune 2>/dev/null || true
  return 0
}

# FIX-104: scan multiple ephemeral prefixes (loop/cycle-, worktree-agent-,
# claude/) and delete any already merged to origin/main. Unmerged branches
# are preserved — they may be active WIP. Caller can pass a custom prefix
# list via $2 (newline-separated `refs/heads/<prefix>*` patterns) but the
# default whitelist covers every temp prefix the loop / Claude session /
# worktree-agent paths create.
_loop_cleanup_stale_cycle_branches() {
  local project_path="${1:-.}"
  local url; url=$(git -C "$project_path" remote get-url origin 2>/dev/null) || return 0
  [[ "$url" == *github.com* ]] || return 0

  local prefixes="${2:-refs/heads/loop/cycle-*
refs/heads/worktree-agent-*
refs/heads/claude/*}"

  local branches=""
  while IFS= read -r pat; do
    [ -z "$pat" ] && continue
    local found
    found=$(git -C "$project_path" ls-remote --heads origin "$pat" 2>/dev/null \
      | awk '{print $2}' | sed 's|^refs/heads/||')
    [ -n "$found" ] && branches+="${found}"$'\n'
  done <<< "$prefixes"
  [ -z "$branches" ] && return 0

  while IFS= read -r branch; do
    [ -z "$branch" ] && continue
    if ! git -C "$project_path" merge-base --is-ancestor "$branch" origin/main 2>/dev/null; then
      continue
    fi
    if git -C "$project_path" push origin --delete "$branch" 2>/dev/null; then
      echo "[loop] deleted stale cycle branch: $branch"
    fi
  done <<< "$branches"
  return 0
}

# FIX-104: residual-visibility command. List origin's ephemeral temp branches
# (loop/cycle-*, worktree-agent-*, claude/*) with their merge status so the
# user can see what GC will clean up next cycle and what's still active WIP.
# Output: TAB-separated `<branch>\t<merged|open>` lines, one per branch.
# Silent on non-GitHub remote / empty / unreachable.
_loop_branches() {
  local project_path="${1:-.}"
  local url; url=$(git -C "$project_path" remote get-url origin 2>/dev/null) || return 0
  [[ "$url" == *github.com* ]] || return 0

  local prefixes="refs/heads/loop/cycle-*
refs/heads/worktree-agent-*
refs/heads/claude/*"

  local branches=""
  while IFS= read -r pat; do
    [ -z "$pat" ] && continue
    local found
    found=$(git -C "$project_path" ls-remote --heads origin "$pat" 2>/dev/null \
      | awk '{print $2}' | sed 's|^refs/heads/||')
    [ -n "$found" ] && branches+="${found}"$'\n'
  done <<< "$prefixes"
  [ -z "$branches" ] && return 0

  while IFS= read -r branch; do
    [ -z "$branch" ] && continue
    local status="open"
    if git -C "$project_path" merge-base --is-ancestor "$branch" origin/main 2>/dev/null; then
      status="merged"
    fi
    printf "%s\t%s\n" "$branch" "$status"
  done <<< "$branches"
  return 0
}

# US-AGENT-028: the v1 agent-routes helpers (_loop_agent_routes_path/show/lint)
# are retired. `roll loop agent-routes` now forwards to `roll agent` via the
# deprecated alias in _loop_agent_routes (schema v3 / agents.yaml replaces the
# v1 type/est/risk routing). lib/agent_routes_lint.py is no longer invoked.

# FIX-146: per-story eligibility gate, reusable for pick and re-validation.
# Exit 0 when the story id is eligible to be worked (📋 Todo, deps satisfied,
# no open PR). Exit 1 otherwise.
# Args: <story-id> [backlog-path] [open-pr-titles]
_loop_story_is_eligible() {
  local id="$1"
  local backlog="${2:-.roll/backlog.md}"
  local open_pr_titles="${3:-}"
  [ -n "$id" ] || return 1
  [ -f "$backlog" ] || return 1

  local row
  row=$(grep -E "^\| \[?${id}[]| ]" "$backlog" | head -1)
  [ -n "$row" ] || return 1

  # Gate 0: status == 📋 Todo (check the last column, not anywhere in the row)
  local _status
  _status=$(printf '%s\n' "$row" | awk -F'|' '{for(i=NF;i>=1;i--) if($i ~ /[^ \t]/) {gsub(/^[ \t]+|[ \t]+$/, "", $i); print $i; exit}}')
  [ "$_status" = "📋 Todo" ] || return 1

  # Gate 1: depends-on
  _loop_check_depends_on "$id" "$backlog" >/dev/null 2>&1 || return 1

  # Gate 2 (FIX-141): skip if an open PR already references this story id.
  if [ -n "$open_pr_titles" ] && printf '%s\n' "$open_pr_titles" | grep -qE "${id}([^0-9A-Za-z]|$)"; then
    return 1
  fi

  return 0
}

# US-AGENT-006: pick the next eligible 📋 Todo story from .roll/backlog.md
# applying the same gates as the roll-loop SKILL Step 2:
#   1. Status = 📋 Todo
#   2. depends-on:* (if any) all ✅ Done
#   3. Priority order: FIX > US > REFACTOR
#
# stdout: chosen story id (single line)
# exit 0 when picked, 1 when nothing eligible.
_loop_pick_next_story() {
  local backlog="${1:-.roll/backlog.md}"
  [ -f "$backlog" ] || return 1

  # FIX-141: fetch open PR titles ONCE so we can skip any story that already
  # has an open PR (it's in review / awaiting merge — re-picking it produces a
  # duplicate PR, as happened with FIX-137 #257 -> #258). gh runs in the cycle
  # worktree (has the remote); empty/unavailable gh => no skipping (safe).
  local _open_pr_titles=""
  if command -v gh >/dev/null 2>&1; then
    _open_pr_titles=$(gh pr list --state open --json title --jq '.[].title' 2>/dev/null || echo "")
  fi

  # Two passes over the file, once per type prefix, return first hit.
  local prefix
  for prefix in FIX US REFACTOR; do
    local id
    while IFS= read -r line; do
      [ -z "$line" ] && continue
      # Skip non-Todo rows fast
      case "$line" in
        *'📋 Todo'*) ;;
        *) continue ;;
      esac
      # Extract id like FIX-XXX-NNN / US-XXX-NNN / REFACTOR-XXX-NNN.
      # FIX-161: only look in the first column (between the first two '|'),
      # never in the description, to avoid routing a Done story whose id
      # happens to appear in a Todo row's description.
      id=$(printf '%s\n' "$line" | awk -F'|' '{print $2}' | grep -oE "${prefix}-[A-Za-z0-9_-]+" | head -1)
      [ -n "$id" ] || continue
      if _loop_story_is_eligible "$id" "$backlog" "$_open_pr_titles" 2>/dev/null; then
        printf '%s\n' "$id"
        return 0
      fi
    done < "$backlog"
  done
  return 1
}

# US-AGENT-030: transparent, auditable in-tier soft nudge.
#
# `_loop_tier_nudge SLOT_AGENT TIER STORY_TYPE` — on top of the est_min tier
# slot result (a HARD constraint), prefer the in-tier candidate agent with the
# best per-(agent × story_type) historical hit-rate. The tier is NOT changed;
# only the agent within it may be re-ordered.
#
# Candidate pool = the slot agent ∪ agents observed running this TIER in
# runs.jsonl, intersected with agents installed on this machine. Hit-rates come
# from lib/loop_result_eval.py --hit-rates (deterministic flat counts), and the
# re-order is the pure nudge_within_tier in lib/loop_pick_agent.py.
#
# Switch: ROLL_AGENT_NUDGE defaults ON. Set ROLL_AGENT_NUDGE=0 to disable — the
# router then behaves EXACTLY like US-AGENT-023 (the nudge runs its identity
# path). When there is no runs.jsonl, no python3, or no jq, the slot agent is
# returned unchanged (graceful degradation, never a hard failure).
#
# stdout: "<chosen_agent>\t<rationale>"  (tab-separated; rationale carries
#         spaces). Always exits 0 — the slot agent is the safe default.
_loop_tier_nudge() {
  local slot_agent="${1:-}" tier="${2:-}" story_type="${3:-}"
  # Empty slot agent → nothing to nudge; echo it back verbatim.
  [ -n "$slot_agent" ] || { printf '%s\t%s\n' "$slot_agent" "no slot agent"; return 0; }

  # Disabled → exact US-AGENT-023 behaviour (identity path in the python).
  local nudge_off=""
  case "${ROLL_AGENT_NUDGE:-1}" in
    0|off|false|no|OFF|FALSE|NO) nudge_off=1 ;;
  esac

  local install_dir
  install_dir="${ROLL_INSTALL_DIR:-$(dirname "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "$0")")")}"

  # Need python3 to run the pure nudge at all; degrade to slot agent otherwise.
  command -v python3 >/dev/null 2>&1 || {
    printf '%s\t%s\n' "$slot_agent" "python3 unavailable; keeping slot $slot_agent"
    return 0
  }

  local runs="" hit_rates="{}" candidates=""
  if [ -z "$nudge_off" ]; then
    # Source runs.jsonl from the current project (read-side resolver).
    runs="$(_loop_runs_file 2>/dev/null || true)"
    if [ -n "$runs" ] && [ -f "$runs" ] && command -v jq >/dev/null 2>&1; then
      # Candidate pool: agents seen on rows whose tier == this tier, intersected
      # with installed agents. The slot agent is folded in by the python.
      local seen_agents installed_list
      seen_agents="$(jq -r --arg t "$tier" 'select(.tier == $t) | .agent // empty' "$runs" 2>/dev/null | sort -u)"
      installed_list="$(_agents_installed 2>/dev/null || true)"
      local a
      for a in $seen_agents; do
        case "
$installed_list
" in
          *"
$a
"*) candidates="${candidates}${candidates:+,}$a" ;;
        esac
      done
      # Hit-rate read model over all rows (flat, deterministic counts).
      hit_rates="$(jq -s '.' "$runs" 2>/dev/null | python3 "$install_dir/lib/loop_result_eval.py" --hit-rates 2>/dev/null || echo '{}')"
      [ -n "$hit_rates" ] || hit_rates="{}"
    fi
  fi

  # Run the pure nudge. --disabled forces the identity path; otherwise the
  # candidates + hit-rates drive the re-order, with the sample floor inside.
  local out
  if [ -n "$nudge_off" ]; then
    out="$(printf '%s' "$hit_rates" | python3 "$install_dir/lib/loop_pick_agent.py" \
      --nudge --slot-agent "$slot_agent" --story-type "$story_type" \
      --candidates "$candidates" --disabled 2>/dev/null)" || out=""
  else
    out="$(printf '%s' "$hit_rates" | python3 "$install_dir/lib/loop_pick_agent.py" \
      --nudge --slot-agent "$slot_agent" --story-type "$story_type" \
      --candidates "$candidates" 2>/dev/null)" || out=""
  fi
  if [ -z "$out" ]; then
    printf '%s\t%s\n' "$slot_agent" "nudge unavailable; keeping slot $slot_agent"
    return 0
  fi
  printf '%s\n' "$out"
}

# US-AGENT-023: pick the agent for a backlog story by complexity tier.
#
# Chain: story est_min → _classify_complexity (lib/loop_pick_agent.py emits the
# tier) → read the matching slot from agents.yaml → agent. When the tier slot is
# empty, fall back to the `default` slot; when `default` is also empty, WARN and
# use _first_installed_agent so the cycle still has a runnable agent.
#
# US-AGENT-030: after the slot agent is resolved, an in-tier soft nudge may
# re-order to a better-performing same-tier agent (see _loop_tier_nudge). The
# nudge never changes the tier; ROLL_AGENT_NUDGE=0 disables it entirely.
#
# Supersedes the US-AGENT-004/005 three-dimensional matcher + history soft
# preference (retired in US-AGENT-022). agents.yaml replaces agent-routes.yaml.
#
# stdout: "<agent> <tier> <rationale...>"   (field 1 = agent, field 2 = tier —
#         the loop inner script reads field 1 as the routed agent, field 2 as
#         the routing "rule", field 3+ as the rationale).
# exit 0 on success, 1 if the story id can't be classified.
_loop_pick_agent_for_story() {
  local story_id="${1:-}"
  if [ -z "$story_id" ]; then
    echo "_loop_pick_agent_for_story: story id required" >&2
    return 1
  fi
  local backlog=".roll/backlog.md"
  [ -f "$backlog" ] || {
    echo "_loop_pick_agent_for_story: $backlog not found" >&2
    return 1
  }
  local install_dir
  install_dir="${ROLL_INSTALL_DIR:-$(dirname "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "$0")")")}"

  # Classify into a complexity tier. The python prints "<tier> <rationale...>".
  local classify_line tier rationale
  classify_line=$(python3 "$install_dir/lib/loop_pick_agent.py" \
    --story-id "$story_id" \
    --backlog "$backlog" 2>/dev/null) || {
    echo "_loop_pick_agent_for_story: cannot classify $story_id" >&2
    return 1
  }
  tier=$(printf '%s\n' "$classify_line" | awk '{print $1}')
  rationale=$(printf '%s\n' "$classify_line" | cut -d' ' -f2-)
  [ -n "$tier" ] || {
    echo "_loop_pick_agent_for_story: empty tier for $story_id" >&2
    return 1
  }

  # Resolve tier → agent via agents.yaml slots, with mechanical fallback.
  local agent=""
  agent=$(_agents_config_slot "$tier" 2>/dev/null || true)
  if [ -z "$agent" ] && [ "$tier" != "default" ]; then
    # Tier slot empty/missing → fall back to the `default` slot.
    agent=$(_agents_config_slot "default" 2>/dev/null || true)
  fi
  if [ -z "$agent" ]; then
    # default slot also empty → WARN and use the first installed agent so the
    # cycle is still runnable rather than silently dropping to nothing.
    agent=$(_first_installed_agent 2>/dev/null || true)
    if [ -n "$agent" ]; then
      warn "agents.yaml: tier '${tier}' and 'default' slot both empty; using first installed agent '${agent}'" >&2
    fi
  fi
  [ -n "$agent" ] || {
    echo "_loop_pick_agent_for_story: no agent resolvable for tier '$tier' (agents.yaml empty and no installed agent)" >&2
    return 1
  }

  # US-AGENT-030: in-tier soft nudge. Tier stays a hard constraint — only the
  # agent within the tier may be re-ordered by historical hit-rate. story_type
  # is the id prefix (US-AGENT-030 → US). The nudge always yields a runnable
  # agent (the slot agent is the safe default) and a human-readable rationale,
  # which we fold into the route line so runs.jsonl + the event log record it.
  local story_type="${story_id%%-*}"
  local nudge_line nudged_agent nudge_reason
  nudge_line="$(_loop_tier_nudge "$agent" "$tier" "$story_type" 2>/dev/null || true)"
  if [ -n "$nudge_line" ]; then
    nudged_agent="${nudge_line%%	*}"
    nudge_reason="${nudge_line#*	}"
    if [ -n "$nudged_agent" ]; then
      agent="$nudged_agent"
      [ -n "$nudge_reason" ] && rationale="$rationale | nudge: $nudge_reason"
    fi
  fi

  printf '%s %s %s\n' "$agent" "$tier" "$rationale"
}

# US-AGENT-024: mechanical fallback resolution.
#
# Given the PRIMARY agent the tier router picked, decide which agent the cycle
# should actually run, using US-AGENT-021's availability probe (+ cache):
#   - primary online            → run primary, no fallback.
#   - primary offline + fallback slot agent online
#                               → run the fallback agent; record fallback_from.
#   - primary offline + fallback also unavailable (missing/offline)
#                               → write an ALERT (reuse the per-project ALERT
#                                 file) and STOP rather than spin through more
#                                 agents. We never chain past one fallback.
#
# A failed primary is also stamped offline in the availability cache (the probe
# inside _agent_available already persists the verdict), so later cycles skip it
# until the TTL expires.
#
# stdout: "<chosen_agent> <fallback_from>"  — field 1 is the agent to run;
#         field 2 is the original agent when a fallback fired, empty otherwise.
#         The loop reads field 2 into runs.jsonl's `fallback_from`.
# exit:   0 chosen agent printed (with or without fallback)
#         1 bad input (empty primary)
#         2 both primary and fallback unavailable → ALERT written, do not run.
#
# bash 3.2 safe: no declare -A, no mapfile/readarray, no ${var^^}/${var,,}.
_loop_resolve_fallback_agent() {
  local primary="${1:-}"
  if [ -z "$primary" ]; then
    echo "_loop_resolve_fallback_agent: primary agent required" >&2
    return 1
  fi

  # Agent names are contractually single tokens (no whitespace), so the
  # "<agent> <fallback_from>" line is awk-field-splittable by the caller.
  #
  # Primary still usable → run it, no fallback. Print only field 1; the
  # caller's `awk '{print $2}'` reads an empty fallback_from. _agent_available
  # reads the availability cache (online/offline) and re-probes when stale.
  if _agent_available "$primary" >/dev/null 2>&1; then
    printf '%s\n' "$primary"
    return 0
  fi

  # Primary down — try the dedicated fallback slot. The caller narrates the
  # degradation on stdout (and emits an agent_fallback event), so we stay
  # quiet here rather than warning into the swallowed stderr stream.
  local fb
  fb="$(_agents_config_slot fallback 2>/dev/null || true)"
  if [ -n "$fb" ] && _agent_available "$fb" >/dev/null 2>&1; then
    printf '%s %s\n' "$fb" "$primary"
    return 0
  fi

  # Neither primary nor fallback usable. Do NOT keep trying other agents;
  # write an ALERT and stop so the operator (or next brief) sees it.
  local alert_file="${_LOOP_ALERT:-}"
  [ -n "$alert_file" ] || alert_file=".roll/loop/ALERT-$(_project_slug 2>/dev/null || basename "$PWD").md"
  mkdir -p "$(dirname "$alert_file")" 2>/dev/null || true
  {
    printf '[%s] agent fallback exhausted: primary "%s" offline' \
      "$(date -u +%FT%TZ)" "$primary"
    if [ -n "$fb" ]; then
      printf ', fallback "%s" also offline' "$fb"
    else
      printf ', no fallback slot configured'
    fi
    printf ' — cycle stopped.\n'
  } >> "$alert_file" 2>/dev/null || true
  echo "_loop_resolve_fallback_agent: primary '${primary}' and fallback unavailable — ALERT written" >&2
  return 2
}

# US-QA-012: merge-time test-quality gate. Scan bats files for ❼ + ❽
# violations; loop auto-merge waits on a clean exit. PR description
# `[skip-test-quality]` marker → US-QA-013 passes --skip here.
#
# Usage:
#   roll loop test-quality-check [--skip] file.bats [file.bats ...]
_loop_test_quality_check() {
  local install_dir
  install_dir="${ROLL_INSTALL_DIR:-$(dirname "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "$0")")")}"
  python3 "$install_dir/lib/test_quality_gate.py" "$@"
}

# US-QA-013: PR description marker check. Returns 0 if the body contains
# `[skip-test-quality]` (case-insensitive); 1 otherwise.
_loop_pr_body_has_skip_test_quality() {
  local body="${1:-}"
  [ -n "$body" ] || return 1
  printf '%s' "$body" | grep -qiE '\[skip-test-quality\]'
}

# US-QA-013: gate + ALERT wrapper. Runs the test-quality gate; on
# violations writes a structured ALERT-<slug>.md entry so the human (or
# next brief) sees what blocked auto-merge. The wrapper is the entry
# point loop calls; it accepts --skip to honor the PR bypass marker.
_loop_test_quality_check_with_alert() {
  local skip=0
  if [ "${1:-}" = "--skip" ]; then
    skip=1; shift
  fi
  if [ "$skip" -eq 1 ]; then
    return 0
  fi
  local install_dir
  install_dir="${ROLL_INSTALL_DIR:-$(dirname "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "$0")")")}"
  local report
  report=$(python3 "$install_dir/lib/test_quality_gate.py" "$@" 2>&1)
  local rc=$?
  if [ "$rc" -eq 0 ]; then
    return 0
  fi

  local main_dir="${ROLL_MAIN_PROJECT:-$PWD}"
  local slug; slug=$(_project_slug "$main_dir" 2>/dev/null || basename "$main_dir")
  local shared_root="${_SHARED_ROOT:-$HOME/.shared/roll}"
  local alert_file="${shared_root}/loop/ALERT-${slug}.md"
  mkdir -p "$(dirname "$alert_file")"
  {
    printf '[%s] test-quality gate blocked auto-merge (rubric ❼ / ❽).\n' \
      "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    printf '%s\n' "$report"
    printf 'Bypass: add `[skip-test-quality]` to the PR description and re-run.\n'
    printf '\n'
  } >> "$alert_file"

  if declare -F _loop_event >/dev/null 2>&1; then
    _loop_event "test_quality_gate_block" "${LOOP_CYCLE_ID:-test-quality}" "${1:-}" "$report" || true
  fi
  echo "[loop] test-quality gate blocked: see $alert_file" >&2
  return 1
}

_loop_agent_routes() {
  # US-AGENT-028: `roll loop agent-routes …` is retained only as a deprecated
  # alias of `roll agent`. The v1 agent-routes.yaml schema (three-dimensional
  # type/est/risk routing) is gone — agents.yaml (schema v3, four complexity
  # slots) replaces it. Each sub-command forwards to the equivalent `roll agent`
  # behaviour after printing a one-line deprecation notice on stderr.
  local sub="${1:-show}"; shift || true
  case "$sub" in
    show)
      echo "roll loop agent-routes: deprecated — use 'roll agent' (showing agents.yaml)" >&2
      cmd_agent
      ;;
    path)
      echo "roll loop agent-routes path: deprecated — use 'roll agent' (agents.yaml path)" >&2
      _agents_config_path
      ;;
    lint)
      echo "roll loop agent-routes lint: deprecated — schema v3 needs no lint" >&2
      return 0
      ;;
    *)
      cat >&2 <<'HELP'
Usage: roll loop agent-routes <show|lint|path>   (DEPRECATED — use 'roll agent')

  show   Alias for 'roll agent' (the schema-v3 complexity-routing view).
  path   Print the active agents.yaml path.
  lint   Deprecated no-op — schema v3 needs no lint.
HELP
      return 1 ;;
  esac
}

# US-AUTO-033: publish a loop cycle branch as a GitHub PR with auto-merge.
#
# _loop_publish_pr <branch> [title]
#   Caller's cwd: a tree where <branch> exists locally.
#   Steps:
#     1. git push origin <branch>
#     2. gh pr view <branch>  → reuse if a PR is already open
#     3. gh pr create --base main --head <branch> ...
#     4. gh pr merge <branch> --auto --squash --delete-branch
#   Stdout: PR URL (always, even on idempotent reuse).
#   Exit 0 on success / idempotent reuse; non-zero on push or create failure.
#   On auto-merge failure: still returns 0 (PR exists; human can take over).
#   When `gh` is not installed: returns 2 — runner script's fallback path.
_loop_publish_pr() {
  local branch="$1"
  local title="${2:-loop cycle ${branch#loop/}}"
  local slug; _gh_resolve slug || {
    _worktree_alert "_loop_publish_pr: gh not installed or origin is not a github repo; cannot publish PR for ${branch}"
    return 2
  }
  local _push_err
  _push_err=$(git push origin "$branch" 2>&1) || {
    _worktree_alert "_loop_publish_pr: push origin ${branch} failed: ${_push_err}"
    return 1
  }
  local pr_url
  pr_url=$(gh -R "$slug" pr view "$branch" --json url -q .url 2>/dev/null) || pr_url=""
  if [ -z "$pr_url" ]; then
    local body
    body=$(printf 'Auto-opened by roll-loop cycle.\n\n- Branch: %s\n- TCR micro-commits: %s\n\nThis PR will auto-merge once required checks pass.' \
      "$branch" "$(git rev-list --count origin/main.."$branch" 2>/dev/null || echo '?')")
    pr_url=$(gh -R "$slug" pr create --base main --head "$branch" \
      --title "$title" --body "$body" 2>/dev/null) || pr_url=""
    if [ -z "$pr_url" ]; then
      _worktree_alert "_loop_publish_pr: gh pr create failed for ${branch}"
      return 1
    fi
  fi
  gh -R "$slug" pr merge "$branch" --auto --squash --delete-branch >/dev/null 2>&1 \
    || _worktree_alert "_loop_publish_pr: gh pr merge --auto failed for ${branch} (PR ${pr_url} left open)"
  # US-VIEW-011: emit 'open' at PR creation; cycle_end path emits a follow-up
  # event with the terminal outcome (merged / closed) via _loop_emit_pr_final.
  _loop_event pr "$branch" "$pr_url" "open" 2>/dev/null || true
  echo "$pr_url"
  return 0
}

# _loop_emit_pr_final <branch>
#   US-VIEW-011: after wait_pr_merge resolves, query gh for the PR's terminal
#   state and emit a second `pr` event so the dashboard renders the correct
#   landing marker (#NN ✓ merged / #NN ↩ closed / #NN … open).
#
#   gh state mapping:
#     MERGED → merged   (auto-merge landed)
#     CLOSED → closed   (PR closed without merging — wasted cycle)
#     OPEN   → open     (still waiting; auto-merge or human reviewer pending)
#     UNKNOWN/error → open (conservative — don't lie about merged)
#
#   Lenient: returns 0 on any failure (gh missing, slug unparseable, network
#   error). The earlier 'open' event remains as the conservative default
#   rendering.
_loop_emit_pr_final() {
  local branch="$1"
  command -v gh >/dev/null 2>&1 || return 0
  local slug; _gh_resolve slug || return 0
  local pr_url state outcome
  pr_url=$(gh -R "$slug" pr view "$branch" --json url -q .url 2>/dev/null) || pr_url=""
  state=$(gh -R "$slug" pr view "$branch" --json state -q .state 2>/dev/null || echo "UNKNOWN")
  case "$state" in
    MERGED) outcome="merged" ;;
    CLOSED) outcome="closed" ;;
    OPEN)   outcome="open"   ;;
    *)      outcome="open"   ;;
  esac
  [ -z "$pr_url" ] && return 0
  _loop_event pr "$branch" "$pr_url" "$outcome" 2>/dev/null || true
  return 0
}

# _loop_wait_pr_merge <branch>
#   FIX-047: poll GitHub until PR for <branch> is merged (confirms delivery).
#   Returns 0: merged. Returns 1: CLOSED or timeout.
#   Gracefully skips (returns 0) when gh is unavailable or slug unparseable.
#   Timeout: ROLL_PR_MERGE_TIMEOUT (default 600s).
_loop_wait_pr_merge() {
  local branch="$1"
  local timeout="${ROLL_PR_MERGE_TIMEOUT:-600}"
  local interval=30
  local elapsed=0
  local slug; _gh_resolve slug || return 0
  while (( elapsed < timeout )); do
    # US-LOOP-007: emit phase_tick at the top of each poll iteration so the
    # tmux reader sees "still waiting" every 30s during merge wait.
    (( elapsed > 0 )) && _loop_event phase_tick publish_wait_merge "${elapsed}s elapsed" "" 2>/dev/null || true
    local state; state=$(gh -R "$slug" pr view "$branch" --json state -q .state 2>/dev/null || echo "UNKNOWN")
    case "$state" in
      MERGED) return 0 ;;
      CLOSED) return 1 ;;
    esac
    sleep "$interval"
    elapsed=$(( elapsed + interval ))
  done
  return 1
}

# _loop_is_doc_only_change
#   Returns 0 if every file changed since origin/main is doc-only
#   (.roll/backlog.md, CHANGELOG.md, .roll/proposals.md, docs/, .claude/).
#   Returns 1 if any code file changed or there are no changes.
_loop_is_doc_only_change() {
  local changed
  changed=$(git diff --name-only origin/main HEAD 2>/dev/null) || return 1
  [ -z "$changed" ] && return 1
  # Post-Phase-1: process artifacts moved into .roll/; user-facing docs at guide/ + site/.
  # Legacy paths (BACKLOG.md, PROPOSALS.md, docs/) kept as fallback for pre-2.0 projects.
  echo "$changed" | grep -qvE '^(\.roll/|CHANGELOG\.md|guide/|site/|\.claude/|BACKLOG\.md|PROPOSALS\.md|docs/)' && return 1
  return 0
}

# _loop_guard_roll_meta_boundary <worktree> <story_id>
# US-LOOP-069: for roll-meta-target stories (deliverable under .roll/), verify
# that no product-repo tracked files were modified in the worktree. Returns 0 if
# safe (or not a roll-meta story), returns 1 if product files were touched and
# writes ALERT. FIX-172: roll-meta-ness is path-based, not tag-based.
_loop_guard_roll_meta_boundary() {
  local wt="$1" story_id="$2"
  [ -n "$story_id" ] || return 0
  [ -d "$wt" ] || return 0

  local _backlog="${ROLL_MAIN_PROJECT:-.}/.roll/backlog.md"
  [ -f "$_backlog" ] || return 0

  _loop_is_roll_meta_story "$story_id" "$_backlog" || return 0

  local _changed _violations
  _changed=$(cd "$wt" && git diff --name-only origin/main..HEAD 2>/dev/null || true)
  [ -n "$_changed" ] || return 0

  _violations=$(printf '%s\n' "$_changed" | grep -v '^\.roll/' || true)
  if [ -n "$_violations" ]; then
    local _alert
    _alert="US-LOOP-069 guard blocked roll-meta story ${story_id}"
    _alert="${_alert}"$'\n'"Touched product files (only .roll/ is allowed):"
    _alert="${_alert}"$'\n'"${_violations}"
    _worktree_alert "$_alert"
    return 1
  fi

  return 0
}

# _loop_publish_doc_pr <branch> [title]
#   Like _loop_publish_pr but merges immediately with --admin (no CI wait).
#   For doc-only changes where CI is not meaningful.
_loop_publish_doc_pr() {
  local branch="$1"
  local title="${2:-doc update ${branch#loop/}}"
  local slug; _gh_resolve slug || {
    _worktree_alert "_loop_publish_doc_pr: gh not installed or origin is not a github repo; cannot publish PR for ${branch}"
    return 2
  }
  if ! git push origin "$branch" --quiet 2>/dev/null; then
    _worktree_alert "_loop_publish_doc_pr: push origin ${branch} failed"
    return 1
  fi
  local pr_url
  pr_url=$(gh -R "$slug" pr view "$branch" --json url -q .url 2>/dev/null) || pr_url=""
  if [ -z "$pr_url" ]; then
    local body
    body=$(printf 'Doc-only update by roll-loop cycle.\n\n- Branch: %s\n- Files: BACKLOG / docs only\n\nMerging immediately — no CI gate needed for doc-only changes.' "$branch")
    pr_url=$(gh -R "$slug" pr create --base main --head "$branch" \
      --title "$title" --body "$body" 2>/dev/null) || pr_url=""
    if [ -z "$pr_url" ]; then
      _worktree_alert "_loop_publish_doc_pr: gh pr create failed for ${branch}"
      return 1
    fi
  fi
  if ! gh -R "$slug" pr merge "$branch" --admin --squash --delete-branch >/dev/null 2>&1; then
    _worktree_alert "_loop_publish_doc_pr: gh pr merge --admin failed for ${branch} (PR ${pr_url} left open)"
    echo "$pr_url"
    return 1
  fi
  echo "$pr_url"
  return 0
}

# _loop_backfill_merged [runs_jsonl_path]
#   FIX-060: independent PR-merge backfill. Walks runs.jsonl, finds entries
#   with status:"built" and a cycle_id field, queries GitHub for the matching
#   loop/cycle-<id> PR, and rewrites entries whose PR is MERGED to
#   status:"merged" with merged_at + merge_commit fields.
#
#   Designed to run from the outer runner BEFORE the pause check, so the
#   scan fires every scheduled tick even when the loop is paused — fixes
#   the pre-FIX-060 behaviour where merge backfill only happened at next
#   cycle startup and stalled forever during pause.
#
#   Lenient: returns 0 when gh is missing, slug is unresolvable, jq is
#   missing, or runs.jsonl does not exist. Atomic rewrite via temp file.

# FIX-123: clean stale runs.jsonl .tmp orphan files from dead pids.
# Called at every runs.jsonl write entry point so orphans never accumulate.
# Optional $1: directory to scan. Defaults to dirname of $_LOOP_RUNS.
_loop_cleanup_stale_runs_tmp() {
  local _dir="${1:-$(dirname "${_LOOP_RUNS:-${HOME}/.shared/roll/loop/runs.jsonl}")}"
  [ -d "$_dir" ] || return 0
  local _f _pid
  for _f in "$_dir"/runs.jsonl.tmp.*; do
    [ -f "$_f" ] || continue
    _pid="${_f##*.tmp.}"
    [ -z "$_pid" ] && continue
    # shellcheck disable=SC2009
    kill -0 "$_pid" 2>/dev/null && continue
    rm -f "$_f"
  done
}

_loop_backfill_merged() {
  (
    # US-LOOP-020: default to the project-local runs.jsonl (callers may pass an
    # explicit path). Was hardcoded to ${HOME}/.shared/roll/loop/runs.jsonl.
    local runs_path="${1:-$(_loop_runs_file)}"
    [ -f "$runs_path" ] || return 0
    command -v gh >/dev/null 2>&1 || return 0
    command -v jq >/dev/null 2>&1 || return 0
    local slug; _gh_resolve slug || return 0

    _loop_cleanup_stale_runs_tmp "$(dirname "$runs_path")"
    local tmp="${runs_path}.tmp.$$"
    trap "rm -f '$tmp'" EXIT
    : > "$tmp"
    local line status cycle_id branch view_json state merged_at merge_commit
    while IFS= read -r line; do
      [ -z "$line" ] && continue
      status=$(printf '%s' "$line" | jq -r '.status // ""' 2>/dev/null)
      cycle_id=$(printf '%s' "$line" | jq -r '.cycle_id // ""' 2>/dev/null)
      if [ "$status" != "built" ] || [ -z "$cycle_id" ]; then
        printf '%s\n' "$line" >> "$tmp"
        continue
      fi
      branch="loop/cycle-${cycle_id}"
      view_json=$(gh -R "$slug" pr view "$branch" --json state,mergedAt,mergeCommit 2>/dev/null) || view_json=""
      if [ -z "$view_json" ]; then
        printf '%s\n' "$line" >> "$tmp"
        continue
      fi
      state=$(printf '%s' "$view_json" | jq -r '.state // ""' 2>/dev/null)
      if [ "$state" != "MERGED" ]; then
        printf '%s\n' "$line" >> "$tmp"
        continue
      fi
      merged_at=$(printf '%s' "$view_json" | jq -r '.mergedAt // ""' 2>/dev/null)
      merge_commit=$(printf '%s' "$view_json" | jq -r '.mergeCommit.oid // ""' 2>/dev/null)
      printf '%s' "$line" | jq -c \
        --arg merged_at "$merged_at" \
        --arg merge_commit "$merge_commit" \
        '.status = "merged" | .merged_at = $merged_at | .merge_commit = $merge_commit' \
        >> "$tmp" 2>/dev/null || printf '%s\n' "$line" >> "$tmp"
    done < "$runs_path"
    mv "$tmp" "$runs_path" 2>/dev/null || rm -f "$tmp"
    trap - EXIT
    return 0
  )
}

_loop_monitor() {
  local interval="${1:-3}"
  local project_path; project_path=$(pwd -P)
  local project_name; project_name=$(basename "$project_path")

  # Determine terminal clear capability
  local clear_cmd="clear"
  command -v clear &>/dev/null || clear_cmd="echo ''"

  while true; do
    $clear_cmd
    local agent; agent=$(_project_agent)
    local now; now=$(date '+%Y-%m-%d %H:%M:%S')

    echo -e "\n  ${BOLD}${CYAN}roll loop monitor${NC}  ${YELLOW}${project_name}${NC}  ${now}  (Ctrl-C to exit)\n"

    # Services status (three services on macOS, single on Linux)
    echo -e "$(msg loop.services ${BOLD} ${NC} ${CYAN} ${agent})"
    if [[ "$(uname)" == "Darwin" ]]; then
      local active_start active_end dream_hour dream_minute
      local _aw; _aw=$(_loop_read_active_window "$project_path")
      active_start="${_aw%% *}"; active_end="${_aw##* }"
      # US-LOOP-013: use schedule spec for display
      local loop_spec loop_period loop_offset
      loop_spec=$(_loop_schedule_spec "$project_path")
      loop_period="${loop_spec%% *}"
      loop_offset="${loop_spec##* }"
      dream_hour=$(_config_read_int "loop_dream_hour" "3")
      dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")

      local loop_sched dream_sched pr_sched
      loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
      loop_sched="${loop_sched}  active ${active_start}:00–${active_end}:00"
      dream_sched=$(printf "%02d:%02d" "$dream_hour" "$dream_minute")
      # FIX-195: pr is a 5-min PR Loop (StartInterval=300); brief was retired.
      pr_sched="every 5m"

      local svcs=("loop" "dream" "pr")
      local scheds=("$loop_sched" "$dream_sched" "$pr_sched")
      for i in "${!svcs[@]}"; do
        local svc="${svcs[$i]}" schedule="${scheds[$i]}"
        local state; state=$(_launchd_svc_state "$svc" "$project_path")
        case "$state" in
          enabled)       printf "    ${GREEN}%-8s %s${NC}       (%s)\n" "$svc" "$(msg loop.svc_enabled)" "$schedule" ;;
          installed-off) printf "    ${YELLOW}%-8s %s${NC}  (%s)  %s\n" "$svc" "$(msg loop.svc_installed_off)" "$schedule" "$(msg loop.svc_enabled_run)" ;;
          not-installed) printf "    ${RED}%-8s %s${NC}  (%s)  %s\n" "$svc" "$(msg loop.svc_not_installed)" "$schedule" "$(msg loop.svc_not_installed_run)" ;;
        esac
      done
    else
      if crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}"; then
        echo -e "    ${GREEN}loop     ● enabled${NC}"
      else
        echo -e "    ${YELLOW}loop     ○ disabled${NC}   run: roll loop on"
      fi
    fi

    # Current state
    if [[ -f "$_LOOP_STATE" ]]; then
      local status current_item started_at run_id
      status=$(grep '^status:' "$_LOOP_STATE" | awk '{print $2}')
      current_item=$(grep '^current_item:' "$_LOOP_STATE" | awk '{print $2}')
      started_at=$(grep '^started_at:' "$_LOOP_STATE" | cut -d' ' -f2- | tr -d '"')
      run_id=$(grep '^run_id:' "$_LOOP_STATE" | awk '{print $2}')
      echo ""
      case "$status" in
        running) echo -e "  State      ${GREEN}▶ running${NC}   ${CYAN}${current_item}${NC}   started: ${started_at}   run: ${run_id}" ;;
        paused)  echo -e "  State      ${RED}‖ paused${NC}    on: ${current_item}" ;;
        idle)    echo -e "  State      ${YELLOW}○ idle${NC}" ;;
        *)       echo -e "  State      ${status}" ;;
      esac
    else
      echo ""
      echo -e "  State      ${YELLOW}○ no state file${NC}"
    fi

    # Alert
    if [[ -f "$_LOOP_ALERT" ]]; then
      echo ""
      echo -e "  ${RED}⚠ ALERT${NC}  (${CYAN}roll alert${NC} to manage)"
      sed 's/^/    /' "$_LOOP_ALERT"
    fi

    # Queue: pending items
    echo ""
    echo -e "$(msg loop.queue ${BOLD} ${NC})"
    local backlog=".roll/backlog.md"
    if [[ -f "$backlog" ]]; then
      local queue_count=0
      local fix_pending us_pending refactor_pending
      fix_pending=$(grep -E '^\| FIX-' "$backlog" | grep -F '| 📋 Todo |' || true)
      us_pending=$(grep -E '^\| \[US-' "$backlog" | grep -F '| 📋 Todo |' || true)
      refactor_pending=$(grep -E '^\| REFACTOR-' "$backlog" | grep -F '| 📋 Todo |' || true)

      # FIX first (priority)
      while IFS= read -r line; do
        [[ -z "$line" ]] && continue
        local id desc
        id=$(echo "$line" | awk -F'|' '{print $2}' | tr -d ' ')
        desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-60)
        printf "    ${RED}%-14s${NC}  %s\n" "$id" "$desc"
        (( queue_count++ )) || true
      done <<< "$fix_pending"

      # US stories
      while IFS= read -r line; do
        [[ -z "$line" ]] && continue
        local id desc
        id=$(echo "$line" | sed 's/.*\[\(US-[^]]*\)\].*/\1/')
        desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-60)
        printf "    ${CYAN}%-14s${NC}  %s\n" "$id" "$desc"
        (( queue_count++ )) || true
      done <<< "$us_pending"

      # Refactors
      while IFS= read -r line; do
        [[ -z "$line" ]] && continue
        local id desc
        id=$(echo "$line" | awk -F'|' '{print $2}' | tr -d ' ')
        desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-60)
        printf "    ${YELLOW}%-14s${NC}  %s\n" "$id" "$desc"
        (( queue_count++ )) || true
      done <<< "$refactor_pending"

      [[ $queue_count -eq 0 ]] && echo -e "    ${GREEN}✓ empty${NC}"
    else
      echo "    .roll/backlog.md not found"
    fi

    # Log tail (launchd.log)
    local log_file="${_SHARED_ROOT}/loop/launchd.log"
    echo ""
    echo -e "  ─────────────────────────────────────────────────────"
    echo -e "$(msg loop.log_tail ${BOLD} ${NC})"
    if [[ -f "$log_file" && -s "$log_file" ]]; then
      tail -10 "$log_file" | sed 's/^/    /'
    else
      echo -e "    ${YELLOW}(no log yet)${NC}"
    fi

    # Event stream (US-LOOP-001): last 10 events from NDJSON event file
    local slug; slug=$(_project_slug "$project_path")
    local evfile="${_SHARED_ROOT}/loop/events-${slug}.ndjson"
    echo ""
    echo -e "  ─────────────────────────────────────────────────────"
    echo -e "$(msg loop.cycle_events ${BOLD} ${NC})"
    if [[ -f "$evfile" && -s "$evfile" ]]; then
      tail -n 10 "$evfile" | python3 -c "
import sys, json
for line in sys.stdin:
    try:
        e = json.loads(line)
        stage = e.get('stage','')
        label = e.get('label','')
        detail = e.get('detail','')
        outcome = e.get('outcome','')
        ts = e.get('ts','')
        print(f'    {ts}  {stage:<14}  {label:<22}  {detail}  {outcome}')
    except: pass
" 2>/dev/null || tail -n 10 "$evfile" | sed 's/^/    /'
    else
      echo -e "    ${YELLOW}(no events yet — events are emitted after the first cycle)${NC}"
    fi

    echo ""
    sleep "$interval"
  done
}

# _loop_event_log: show last N events from the project's NDJSON event file.
# Used by: roll loop events [N]
_loop_event_log() {
  local n="${1:-20}"
  local project_path; project_path=$(pwd -P)
  local slug; slug=$(_project_slug "$project_path")
  local evfile="${_SHARED_ROOT}/loop/events-${slug}.ndjson"
  if [ ! -f "$evfile" ]; then
    echo "[monitor] No event log found for project: $slug"
    return 1
  fi
  # Show last N events, formatted
  tail -n "$n" "$evfile" | python3 -c "
import sys, json
for line in sys.stdin:
    try:
        e = json.loads(line)
        print(f\"  {e.get('ts','')}  {e.get('stage',''):12s}  {e.get('label',''):20s}  {e.get('detail','')}  {e.get('outcome','')}\")
    except: pass
"
}

# ═══════════════════════════════════════════════════════════════════════════════
# BRIEF — owner-facing project digest
# ═══════════════════════════════════════════════════════════════════════════════

cmd_brief() {
  local briefs_dir=".roll/briefs"
  local latest; latest=$(ls "${briefs_dir}"/*.md 2>/dev/null | sort | tail -1 || true)

  if [[ -z "$latest" ]]; then
    info "$(msg brief.no_brief_yet_generating)"
    _agent_run_skill "roll-brief"
    latest=$(ls "${briefs_dir}"/*.md 2>/dev/null | sort | tail -1 || true)
  else
    local mod_time now age
    mod_time=$(_file_mtime "$latest")
    now=$(date +%s); age=$(( now - mod_time ))
    if (( age > 86400 )); then
      info "$(msg brief.brief_is_age_3600_h_old "$(( age / 3600 ))")"
      _agent_run_skill "roll-brief"
      latest=$(ls "${briefs_dir}"/*.md 2>/dev/null | sort | tail -1 || true)
    fi
  fi

  if [[ ! -f "$latest" ]]; then
    return 1
  fi

  # ── Display mode ──────────────────────────────────────────────────────────
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-brief.py" "$@"
    return
  fi

  cat "$latest"
}

# REFACTOR-030: removed `_promote_unreleased` and `_ensure_unreleased`.
# REFACTOR-021 collapsed the changelog double-pipeline so the release script
# generates the version header directly from BACKLOG, leaving these two
# helpers orphaned. Their behaviour is now part of the changelog renderer
# called from the maintainer-private release script at roll-meta/ops/release.sh.

# ═══════════════════════════════════════════════════════════════════════════════
# BACKLOG — show pending tasks / manage status
# ═══════════════════════════════════════════════════════════════════════════════

# Update status of all BACKLOG rows whose ID field contains <pattern> (case-insensitive).
# Uses Python for reliable emoji/Unicode handling.
_backlog_set_status() {
  local pattern="$1"
  local new_status="$2"
  local backlog=".roll/backlog.md"
  python3 -c "
import sys, re
pattern, new_status, filename = sys.argv[1], sys.argv[2], sys.argv[3]
lines = open(filename, encoding='utf-8').readlines()
count = 0
out = []
for line in lines:
    if line.startswith('|') and line.count('|') >= 4:
        parts = line.split('|')
        if len(parts) >= 5:
            id_field = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', parts[1]).strip()
            if pattern.upper() in id_field.upper():
                parts[-2] = ' ' + new_status + ' '
                line = '|'.join(parts)
                count += 1
    out.append(line)
open(filename, 'w', encoding='utf-8').writelines(out)
print(count)
" "$pattern" "$new_status" "$backlog"
}

_backlog_extract_id() {
  local line="$1"
  if echo "$line" | grep -q '\[US-'; then
    echo "$line" | sed 's/.*\[\(US-[^]]*\)\].*/\1/'
  else
    echo "$line" | awk -F'|' '{print $2}' | tr -d ' '
  fi
}

# Render one pending-group section (FIX / US / REFACTOR / IDEA) — all four
# types share identical row structure, so they share one render path. Format
# changes only need to happen here.
#   $1 title (EN + ZH)   $2 ANSI color   $3 count   $4 id column width   $5 items text
_backlog_render_group() {
  local title="$1" color="$2" count="$3" width="$4" items="$5"
  echo -e "  ${color}${title}  (${count})${NC}"
  while IFS= read -r line; do
    [[ -z "$line" ]] && continue
    local id desc
    id=$(_backlog_extract_id "$line")
    desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//')
    printf "    %-${width}s  %s\n" "$id" "$desc"
  done <<< "$items"
  echo ""
}

# ═══════════════════════════════════════════════════════════════════════════════
# CI — check or wait for current commit's CI status
# ═══════════════════════════════════════════════════════════════════════════════
# ALERT — view / ack / resolve loop alert lifecycle
# ═══════════════════════════════════════════════════════════════════════════════

cmd_alert() {
  local subcmd="${1:-list}"
  shift || true

  case "$subcmd" in
    list|"")
      if [[ ! -f "$_LOOP_ALERT" ]]; then
        ok "$(msg alert.no_active_alerts)"
        return 0
      fi
      echo -e "$(msg alert.active_alert ${BOLD} ${NC})"
      echo ""
      cat "$_LOOP_ALERT"
      echo ""
      echo -e "  Run '${CYAN}roll alert ack${NC}' to acknowledge, '${CYAN}roll alert resolve${NC}' to clear."
      echo -e "$(msg alert.run_roll_alert_ack_to_acknowledge)"
      ;;
    ack)
      if [[ ! -f "$_LOOP_ALERT" ]]; then
        warn "$(msg alert.no_active_alerts_to_acknowledge)"
        return 0
      fi
      local ts; ts=$(date '+%Y-%m-%d %H:%M:%S')
      {
        echo ""
        echo "**Acknowledged**: ${ts}"
      } >> "$_LOOP_ALERT"
      ok "$(msg alert.alert_acknowledged_at ${ts})"
      ;;
    resolve|clear)
      if [[ ! -f "$_LOOP_ALERT" ]]; then
        ok "$(msg alert.no_active_alerts_2)"
        return 0
      fi
      rm -f "$_LOOP_ALERT"
      ok "$(msg alert.alert_resolved_and_cleared)"
      ;;
    log)
      # US-AUTO-046: show the Alert Loop's consumption history — the most recent
      # N records the dispatcher wrote to alert-log.jsonl (newest first).
      _alert_log "$@"
      ;;
    *)
      err "$(msg alert.unknown_subcommand $subcmd)"
      echo "  Usage: roll alert [list|ack|resolve|log]"
      echo "  用法：roll alert [list|ack|resolve|log]"
      return 1
      ;;
  esac
}

# _alert_log [N]
#   US-AUTO-046: print the last N (default 10) records from alert-log.jsonl,
#   newest first — a pull-based view of what the Alert Loop has consumed
#   (analogous to `roll loop runs`). Each line: HH:MM  <glyph> [level] category — message
#   The glyph is ● when the alert was notified, ○ when it was throttled/deduped.
_alert_log() {
  local n="${1:-10}"
  case "$n" in ''|*[!0-9]*) n=10 ;; esac
  local file; file=$(_alert_log_file)
  if [[ ! -s "$file" ]]; then
    ok "No alert history yet."
    echo "  暂无告警历史。"
    return 0
  fi
  echo -e "  ${BOLD}Alert log${NC}  (${CYAN}${file}${NC})"
  echo -e "  告警日志  最近 ${n} 条"
  echo ""
  ROLL_GREEN="$GREEN" ROLL_YELLOW="$YELLOW" ROLL_RED="$RED" ROLL_NC="$NC" \
  python3 - "$file" "$n" <<'PY'
import json, sys
path, n = sys.argv[1], int(sys.argv[2])
import os
G=os.environ.get("ROLL_GREEN",""); Y=os.environ.get("ROLL_YELLOW","")
R=os.environ.get("ROLL_RED",""); NC=os.environ.get("ROLL_NC","")
rows=[]
with open(path) as fh:
    for ln in fh:
        ln=ln.strip()
        if not ln: continue
        try: rows.append(json.loads(ln))
        except Exception: continue
for r in reversed(rows[-n:]):
    ts=r.get("recorded_at") or r.get("ts") or ""
    hhmm=ts[11:16] if len(ts)>=16 else ts
    notified=str(r.get("notified")) in ("1","True","true")
    glyph=(G+"●"+NC) if notified else (Y+"○"+NC)
    level=r.get("level","")
    lc={"error":R,"warn":Y,"info":""}.get(level,"")
    cat=r.get("category","")
    msg=r.get("message","")
    print(f"  {hhmm}  {glyph} {lc}[{level}]{NC} {cat} — {msg}")
PY
}

# ═══════════════════════════════════════════════════════════════════════════════
# FEEDBACK — one-shot GitHub issue from the CLI (US-FB-001)
# ═══════════════════════════════════════════════════════════════════════════════

# Derive owner/repo from git origin. Returns "" when not a github remote.
_feedback_origin_repo() {
  local url
  url=$(git remote get-url origin 2>/dev/null) || return 0
  case "$url" in
    git@github.com:*)
      url="${url#git@github.com:}"
      url="${url%.git}"
      printf '%s\n' "$url"
      ;;
    https://github.com/*)
      url="${url#https://github.com/}"
      url="${url%.git}"
      printf '%s\n' "$url"
      ;;
    *) printf '\n' ;;
  esac
}

# US-FB-003: feedback target repo precedence:
#   1. --repo flag (caller already resolved this; not part of this helper)
#   2. ROLL_FEEDBACK_REPO env var
#   3. .roll/local.yaml `feedback_repo:`
#   4. ~/.roll/config.yaml `feedback_repo:`
#   5. origin-derived github owner/repo
_feedback_yaml_field() {
  local file="$1" field="$2"
  [ -f "$file" ] || return 0
  awk -v key="$field" '
    $0 ~ "^"key":" {
      v=$0; sub("^"key":[[:space:]]*", "", v); gsub("^[\"\x27]|[\"\x27]$", "", v); print v; exit
    }' "$file"
}

_feedback_default_repo() {
  if [ -n "${ROLL_FEEDBACK_REPO:-}" ]; then
    printf '%s\n' "$ROLL_FEEDBACK_REPO"
    return 0
  fi
  local project_local=".roll/local.yaml"
  local v
  v=$(_feedback_yaml_field "$project_local" "feedback_repo")
  if [ -n "$v" ]; then
    printf '%s\n' "$v"
    return 0
  fi
  local global="${HOME}/.roll/config.yaml"
  v=$(_feedback_yaml_field "$global" "feedback_repo")
  if [ -n "$v" ]; then
    printf '%s\n' "$v"
    return 0
  fi
  _feedback_origin_repo
}

# Map --type to GitHub label list (single, no spaces).
_feedback_label_for_type() {
  case "${1:-}" in
    bug)  printf 'bug,FIX\n' ;;
    idea) printf 'idea,enhancement,US\n' ;;
    ux)   printf 'ux,enhancement\n' ;;
    *)    printf 'feedback\n' ;;
  esac
}

# Percent-encode for use in a GitHub issue URL query string.
_feedback_urlencode() {
  python3 -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.argv[1], safe=""))' "$1"
}

# US-FB-002: compose env info appendix attached to feedback body unless
# --no-env is set. Single source of truth so future feedback paths
# (web embedded, slack, etc.) can reuse.
_feedback_env_block() {
  local roll_v os_name shell_name agent lang project
  roll_v="${VERSION:-unknown}"
  os_name="$(uname -srm 2>/dev/null || echo unknown)"
  shell_name="$(basename "${SHELL:-/bin/sh}")"
  agent=$(_project_agent 2>/dev/null || echo "unknown")
  lang="${LANG:-${LC_ALL:-unknown}}"
  project="$(basename "$(pwd -P)")"
  cat <<EOF

---

### Environment
- roll version: $roll_v
- OS: $os_name
- shell: $shell_name
- current agent: $agent
- language: $lang
- project: $project
EOF
}

cmd_feedback() {
  local type="" title="" body="" repo="" print_url=0 attach_env=1
  while [ $# -gt 0 ]; do
    case "$1" in
      --type)      type="$2"; shift 2 ;;
      --title)     title="$2"; shift 2 ;;
      --body)      body="$2"; shift 2 ;;
      --repo)      repo="$2"; shift 2 ;;
      --no-env)    attach_env=0; shift ;;
      --print-url) print_url=1; shift ;;
      --help|-h)
        cat <<'HELP'
Usage: roll feedback [options]
        roll feedback (一句话提反馈)

Open a GitHub issue from the CLI. Type auto-labels (bug → FIX label;
idea → US label; ux → ux label).

Options:
  --type <bug|idea|ux>      Classify the feedback (default: bug)
  --title <text>            Issue title (required)
  --body <text>             Issue body
  --repo <owner/repo>       Target repo (default: derived from origin)
  --no-env                  Skip the auto-attached Environment section
                            (roll version, OS, agent, language, project)
  --print-url               Print the prefilled github.com URL instead of
                            invoking `gh`. Falls back to this automatically
                            when `gh` is not installed.
HELP
        return 0 ;;
      *)
        err "feedback: unknown flag $1"
        return 1 ;;
    esac
  done

  if [ -z "$title" ]; then
    err "feedback: --title is required"
    return 1
  fi
  if [ -z "$type" ]; then
    type="bug"
  fi
  case "$type" in
    bug|idea|ux) ;;
    *)
      err "feedback: unknown --type '$type' (expected one of: bug, idea, ux)"
      return 1 ;;
  esac

  if [ -z "$repo" ]; then
    repo=$(_feedback_default_repo)
  fi
  if [ -z "$repo" ]; then
    err "feedback: cannot derive owner/repo from origin; pass --repo owner/repo"
    return 1
  fi

  # US-FB-002: compose final body with optional env appendix.
  if [ "$attach_env" -eq 1 ]; then
    body="${body}$(_feedback_env_block)"
  fi

  local labels; labels=$(_feedback_label_for_type "$type")

  # Decide path: --print-url or gh missing → print URL; else use gh.
  if [ "$print_url" -eq 1 ] || ! command -v gh >/dev/null 2>&1; then
    local t_enc b_enc l_enc
    t_enc=$(_feedback_urlencode "$title")
    b_enc=$(_feedback_urlencode "$body")
    l_enc=$(_feedback_urlencode "$labels")
    printf 'https://github.com/%s/issues/new?title=%s&body=%s&labels=%s\n' \
      "$repo" "$t_enc" "$b_enc" "$l_enc"
    return 0
  fi

  # Real path: gh issue create
  gh issue create --repo "$repo" --title "$title" --body "$body" --label "$labels"
}

# ═══════════════════════════════════════════════════════════════════════════════
# LANG — switch / inspect Roll's UI language (US-I18N-001)
# ═══════════════════════════════════════════════════════════════════════════════

cmd_lang() {
  local arg="${1:-}"

  case "$arg" in
    "")
      unset ROLL_LANG_RESOLVED
      local current src
      current=$(_i18n_resolve_lang)
      if [[ -n "${ROLL_LANG:-}" ]]; then
        src="ROLL_LANG env"
      elif [[ -f "$ROLL_CONFIG" ]] && grep -qE '^lang:' "$ROLL_CONFIG"; then
        src="config (${ROLL_CONFIG})"
      elif [[ -n "${LC_ALL:-}" || -n "${LANG:-}" ]]; then
        src="LC_ALL/LANG"
      else
        src="default"
      fi
      echo "current: ${current}, source: ${src}"
      ;;
    zh|en)
      mkdir -p "$(dirname "$ROLL_CONFIG")"
      [[ -f "$ROLL_CONFIG" ]] || : > "$ROLL_CONFIG"
      local tmp; tmp=$(mktemp)
      grep -vE '^lang:' "$ROLL_CONFIG" > "$tmp" || true
      printf 'lang: %s\n' "$arg" >> "$tmp"
      mv "$tmp" "$ROLL_CONFIG"
      unset ROLL_LANG_RESOLVED
      ok "$(msg lang.language_set_to ${arg})"
      ;;
    --reset)
      if [[ -f "$ROLL_CONFIG" ]]; then
        local tmp; tmp=$(mktemp)
        grep -vE '^lang:' "$ROLL_CONFIG" > "$tmp" || true
        mv "$tmp" "$ROLL_CONFIG"
      fi
      unset ROLL_LANG_RESOLVED
      ok "$(msg lang.language_preference_cleared_will_follow_locale)"
      ;;
    *)
      err "$(msg lang.unknown_language ${arg})"
      echo "  Valid values: zh, en, --reset"
      echo "$(msg lang.options_zh_en_reset)"
      return 1
      ;;
  esac
}

# ═══════════════════════════════════════════════════════════════════════════════

cmd_ci() {
  local wait_mode=false
  local timeout=300

  while [[ $# -gt 0 ]]; do
    case "$1" in
      --wait) wait_mode=true; shift ;;
      --timeout=*) timeout="${1#*=}"; shift ;;
      *) err "$(msg ci.usage_roll_ci_wait_timeout_n)"; exit 1 ;;
    esac
  done

  if $wait_mode; then
    _ci_wait "$timeout"
    return
  fi

  _gh_available || { warn "$(msg ci.gh_not_installed_gh)"; return 0; }
  local commit; commit=$(git rev-parse HEAD 2>/dev/null) || { err "$(msg ci.not_a_git_repo)"; return 1; }
  local runs
  runs=$(gh run list --commit "$commit" --json status,conclusion,name 2>/dev/null) || { warn "gh run list failed"; return 0; }
  if [[ -z "$runs" || "$runs" == "[]" ]]; then
    echo "$(msg ci.no_ci_runs_for_git_rev "${commit:0:7}")"
    return 0
  fi
  echo "$runs" | jq -r '.[] | "\(.name): \(.status)/\(.conclusion)"'
}

# REFACTOR-041: backlog description linter. The global convention bans file
# paths, function names, filenames, and "architecture jargon" in description
# columns — see conventions/global/AGENTS.md §4. This helper scans each row's
# description column for those patterns and prints any findings. Phase 1 is
# warn-only (always exit 0) so a noisy ramp-up doesn't block work; Phase 2
# will switch to hard-fail. Output format mirrors a linter ("file:line:
# message") so editors can navigate from it.
_backlog_lint() {
  # FIX-102: --gate flag flips Phase 1 warn-only behavior to hard-fail.
  # When passed, any violation makes the command exit 1 — used by the
  # PreToolUse / Stop hook in ~/.claude/settings.json to actually block
  # the assistant from leaving the backlog dirty.
  local gate=0
  local backlog=".roll/backlog.md"
  while [ $# -gt 0 ]; do
    case "$1" in
      --gate) gate=1 ;;
      *) backlog="$1" ;;
    esac
    shift
  done
  [ -f "$backlog" ] || { err "backlog not found: $backlog"; return 1; }

  local violations=0
  local lineno=0
  while IFS= read -r line; do
    lineno=$((lineno+1))
    # Only data rows (start with "|"), skip header/separator/non-table
    case "$line" in
      \|*) ;;
      *) continue ;;
    esac
    case "$line" in
      *Story*Description*Status*|*'---'*) continue ;;
    esac
    local desc
    desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//')
    [ -n "$desc" ] || continue
    # Strip the leading `[US-XXX](path)` link / bare `US-XXX` id — those are
    # structural, not description prose.
    local body
    body=$(echo "$desc" \
      | sed -E 's|^\[[A-Z]+-[0-9]+\]\([^)]*\)[[:space:]]*||' \
      | sed -E 's|^[A-Z]+-[0-9]+[[:space:]]*||')
    local issues=""
    # FIX-102: length check — backlog rows are an index page; descriptions
    # must be one human sentence (≤120 chars). Longer = technical detail
    # that belongs in the linked .roll/features/<epic>/<slug>.md.
    if [ "${#body}" -gt 120 ]; then
      issues="${issues:+${issues}, }length>${#body}"
    fi
    # FIX-102: code-fence check — backticks (`code`) signal technical jargon
    # (commands, identifiers, paths). Keep description prose plain text;
    # any code goes in the feature file.
    if echo "$body" | grep -qF '`'; then
      issues="${issues:+${issues}, }code-fence"
    fi
    # Filenames: bare `something.ext` for common code/config extensions
    if echo "$body" | grep -qE '\b[A-Za-z_][A-Za-z0-9_.-]*\.(sh|bash|yaml|yml|json|js|ts|tsx|py|rb|go|rs|c|cpp|h)\b'; then
      issues="${issues:+${issues}, }filename"
    fi
    # Paths: directory/anything pattern not preceded by `(` (links already
    # stripped above). Hyphens / dots / underscores allowed in path segments.
    if echo "$body" | grep -qE '[A-Za-z_][A-Za-z0-9_.-]*/[A-Za-z0-9_./-]+'; then
      issues="${issues:+${issues}, }path"
    fi
    # Function names: underscore-prefixed identifier or trailing parens
    if echo "$body" | grep -qE '\b_[a-zA-Z][a-zA-Z0-9_]+\b|\b[A-Za-z_][A-Za-z0-9_]+\(\)'; then
      issues="${issues:+${issues}, }function"
    fi
    if [ -n "$issues" ]; then
      violations=$((violations+1))
      # Extract the story id from column 2 so reports name the offending row.
      local sid; sid=$(echo "$line" | awk -F'|' '{print $2}' \
        | sed -E 's/^[[:space:]]*\[?([A-Z]+-[0-9]+).*/\1/' \
        | tr -d '[:space:]')
      printf '%s:%d: %s — %s\n  %s\n' "$backlog" "$lineno" "$sid" "$issues" "$desc"
    fi
  done < "$backlog"

  echo ""
  if [ "$violations" -gt 0 ]; then
    echo "  ${violations} violation(s) — see conventions/global/AGENTS.md §4"
    if [ "$gate" = 1 ]; then
      echo "$(msg ci.gate_enabled_exiting_1 ${violations})"
      return 1
    fi
    echo "$(msg ci.phase_1_warn_only_not_blocking ${violations})"
  else
    echo "$(msg ci.no_violations)"
  fi
  return 0
}

cmd_backlog() {
  local backlog=".roll/backlog.md"
  if [[ ! -f "$backlog" ]]; then
    err "$(msg backlog.roll_backlog_md_not_found_run)"
    return 1
  fi

  local subcmd="${1:-}"

  # ── Status management subcommands ─────────────────────────────────────────
  case "$subcmd" in
    lint)
      shift
      _backlog_lint "$@" "$backlog"
      return
      ;;
    unstick)
      # FIX-112: revert 🔨 In Progress stories whose latest cycle ended
      # failed / aborted / blocked > N hours ago (default 4). Conservative
      # gate so it never undoes legitimately-in-progress work.
      shift
      python3 "${ROLL_PKG_DIR}/lib/loop_unstick.py" "$@"
      return
      ;;
    sync)
      # US-SYNC-002: pull GitHub issues into the local backlog. label→type
      # mapping (bug→FIX, enhancement/feature/US→US, refactor→REFACTOR,
      # else US), title→Description, open→📋 Todo / closed→✅ Done. Single
      # direction (issues → backlog.md).
      shift
      python3 "${ROLL_PKG_DIR}/lib/github_sync.py" sync "$@" --backlog "$backlog"
      return
      ;;
    block|defer|unblock|promote)
      local pattern="${2:-}"
      local reason="${3:-}"
      if [[ -z "$pattern" ]]; then
        err "$(msg backlog.usage_roll_backlog_pattern_reason $subcmd)"
        return 1
      fi
      local new_status
      case "$subcmd" in
        block)           new_status="🔒 Blocked${reason:+ [${reason}]}" ;;
        defer)           new_status="⏸ Deferred${reason:+ [${reason}]}" ;;
        unblock|promote) new_status="📋 Todo" ;;
      esac
      local count
      count=$(_backlog_set_status "$pattern" "$new_status")
      if [[ "$count" -eq 0 ]]; then
        echo "$(msg backlog.no_items_matched $pattern)"
      else
        echo "$(msg backlog.updated_item_s ${count} ${new_status})"
      fi
      return
      ;;
  esac

  # ── Display mode ──────────────────────────────────────────────────────────
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-backlog.py" "$@"
    return
  fi
  local DIM='\033[2m'

  local us_items fix_items refactor_items idea_items total=0
  us_items=$(grep -E '^\| \[US-' "$backlog" | grep -F '| 📋 Todo |' || true)
  fix_items=$(grep -E '^\| FIX-' "$backlog" | grep -F '| 📋 Todo |' || true)
  refactor_items=$(grep -E '^\| REFACTOR-' "$backlog" | grep -F '| 📋 Todo |' || true)
  idea_items=$(grep -E '^\| IDEA-' "$backlog" | grep -F '| 📋 Todo |' || true)

  local us_count fix_count refactor_count idea_count
  us_count=$(echo "$us_items" | grep -c . || true)
  fix_count=$(echo "$fix_items" | grep -c . || true)
  refactor_count=$(echo "$refactor_items" | grep -c . || true)
  idea_count=$(echo "$idea_items" | grep -c . || true)
  [[ -z "$us_items" ]] && us_count=0
  [[ -z "$fix_items" ]] && fix_count=0
  [[ -z "$refactor_items" ]] && refactor_count=0
  [[ -z "$idea_items" ]] && idea_count=0
  total=$(( us_count + fix_count + refactor_count + idea_count ))

  local blocked_items deferred_items unknown_items
  blocked_items=$(grep -E '^\|' "$backlog" | grep '🔒 Blocked' || true)
  deferred_items=$(grep -E '^\|' "$backlog" | grep '⏸ Deferred' || true)
  unknown_items=$( { grep -E '^\| \[US-' "$backlog"; grep -E '^\| FIX-' "$backlog"; grep -E '^\| REFACTOR-' "$backlog"; grep -E '^\| IDEA-' "$backlog"; } \
    | grep -v '📋 Todo\|🔨 In Progress\|✅ Done\|🔒 Blocked\|⏸ Deferred' || true)

  local blocked_count deferred_count unknown_count
  blocked_count=$(echo "$blocked_items" | grep -c . || true)
  deferred_count=$(echo "$deferred_items" | grep -c . || true)
  unknown_count=$(echo "$unknown_items" | grep -c . || true)
  [[ -z "$blocked_items" ]] && blocked_count=0
  [[ -z "$deferred_items" ]] && deferred_count=0
  [[ -z "$unknown_items" ]] && unknown_count=0

  echo ""
  echo -e "$(msg backlog.pending_backlog ${BOLD} ${NC} ${total})"
  echo ""

  [[ $fix_count -gt 0 ]]      && _backlog_render_group "$(msg backlog.bug_fixes)"   "$RED"    "$fix_count"      12 "$fix_items"
  [[ $us_count -gt 0 ]]       && _backlog_render_group "$(msg backlog.user_stories)" "$CYAN"   "$us_count"       14 "$us_items"
  [[ $refactor_count -gt 0 ]] && _backlog_render_group "$(msg backlog.refactors)"        "$YELLOW" "$refactor_count" 16 "$refactor_items"
  [[ $idea_count -gt 0 ]]     && _backlog_render_group "$(msg backlog.ideas)"            "$NC"     "$idea_count"     14 "$idea_items"

  if [[ $total -eq 0 ]]; then
    echo -e "$(msg backlog.nothing_pending_backlog_is_clear ${GREEN} ${NC})"
    echo ""
  fi

  # ── Blocked ───────────────────────────────────────────────────────────────
  if [[ $blocked_count -gt 0 ]]; then
    echo -e "$(msg backlog.blocked ${DIM} ${blocked_count} ${NC})"
    while IFS= read -r line; do
      [[ -z "$line" ]] && continue
      local id desc reason
      id=$(_backlog_extract_id "$line")
      desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-52)
      reason=$(echo "$line" | awk -F'|' '{print $4}' | grep -oE '\[.*\]' | tr -d '[]' || true)
      printf "    ${DIM}🔒 %-14s  %s${NC}" "$id" "$desc"
      [[ -n "$reason" ]] && printf "${DIM}  (%s)${NC}" "$reason"
      printf "\n"
    done <<< "$blocked_items"
    echo ""
  fi

  # ── Deferred ──────────────────────────────────────────────────────────────
  if [[ $deferred_count -gt 0 ]]; then
    echo -e "$(msg backlog.deferred ${DIM} ${deferred_count} ${NC})"
    while IFS= read -r line; do
      [[ -z "$line" ]] && continue
      local id desc reason
      id=$(_backlog_extract_id "$line")
      desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-52)
      reason=$(echo "$line" | awk -F'|' '{print $4}' | grep -oE '\[.*\]' | tr -d '[]' || true)
      printf "    ${DIM}⏸ %-14s  %s${NC}" "$id" "$desc"
      [[ -n "$reason" ]] && printf "${DIM}  (%s)${NC}" "$reason"
      printf "\n"
    done <<< "$deferred_items"
    echo ""
  fi

  # ── Unknown status (show for human/AI triage) ─────────────────────────────
  if [[ $unknown_count -gt 0 ]]; then
    echo -e "$(msg backlog.unknown_status ${YELLOW} ${unknown_count} ${NC})"
    echo -e "$(msg backlog.fix_roll_backlog_block_defer_unblock ${YELLOW} ${NC})"
    while IFS= read -r line; do
      [[ -z "$line" ]] && continue
      local id desc status_raw
      id=$(_backlog_extract_id "$line")
      desc=$(echo "$line" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-52)
      status_raw=$(echo "$line" | awk -F'|' '{print $4}' | sed 's/^ *//;s/ *$//')
      printf "    ${YELLOW}? %-14s  %s  [%s]${NC}\n" "$id" "$desc" "$status_raw"
    done <<< "$unknown_items"
    echo ""
  fi
}

# ─────────────────────────────────────────────────────────────────────────────
# DASHBOARD — 自治优先六块布局 (US-AUTO-029)
# ─────────────────────────────────────────────────────────────────────────────

# ① Identity — git working tree state.
_dash_git_status() {
  git rev-parse --is-inside-work-tree &>/dev/null || { echo "—"; return; }
  if [[ -z "$(git status --porcelain 2>/dev/null)" ]]; then
    echo "✓"
  else
    echo "dirty"
  fi
}

# ② Loop layer: extract in-progress story id|title|feature-link from .roll/backlog.md.
# Output empty if no row's *status column* is 🔨 In Progress (substring matches
# anywhere on the row would catch description text that mentions the emoji).
_dash_in_progress_story() {
  [[ -f ".roll/backlog.md" ]] || return 0
  local row
  row=$(grep -F '| 🔨 In Progress |' .roll/backlog.md | head -1) || return 0
  [[ -z "$row" ]] && return 0
  local id desc
  id=$(echo "$row" | grep -oE '(US|FIX|REFACTOR)-[A-Z]*-?[0-9]+' | head -1)
  desc=$(echo "$row" | awk -F'|' '{print $3}' | sed 's/^ *//;s/ *$//' | cut -c1-60)
  local link
  link=$(echo "$row" | grep -oE '.roll/features/[^)]+' | head -1 || true)
  printf '%s|%s|%s' "$id" "$desc" "$link"
}

# ② Loop layer: minutes since last "tcr:" commit, or empty if none.
_dash_last_tcr_minutes() {
  git rev-parse --is-inside-work-tree &>/dev/null || return 0
  local last_ts
  last_ts=$(git log --grep='^tcr:' -1 --format=%ct 2>/dev/null)
  [[ -z "$last_ts" ]] && return 0
  local now; now=$(date +%s)
  echo $(( (now - last_ts) / 60 ))
}

# ② Loop layer: tcr: commits since midnight today.
_dash_tcr_today_count() {
  git rev-parse --is-inside-work-tree &>/dev/null || { echo 0; return; }
  local since; since=$(date '+%Y-%m-%d 00:00:00')
  git log --since="$since" --grep='^tcr:' --oneline 2>/dev/null | grep -c '^' || echo 0
}

# ② Dream layer: hours since last dream log entry on disk.
_dash_last_dream_hours() {
  local dream_log="${HOME}/.shared/roll/dream/log.md"
  [[ -f "$dream_log" ]] || return 0
  local mod_time now
  mod_time=$(_file_mtime "$dream_log")
  now=$(date +%s)
  echo $(( (now - mod_time) / 3600 ))
}

# ② Dream layer: count of REFACTOR-XXX rows currently 📋 Todo in BACKLOG.
_dash_refactor_pending() {
  [[ -f ".roll/backlog.md" ]] || { echo 0; return; }
  grep -E '^\| REFACTOR-' .roll/backlog.md 2>/dev/null | grep -F '| 📋 Todo |' | wc -l | tr -d ' '
}

# ② Peer layer: last result + days ago from peer log, empty if no log.
_dash_last_peer() {
  # FIX-150a: read from project-local peer logs (was ~/.shared/roll/peer/*.log).
  local peer_log_dir
  peer_log_dir=$(_peer_project_dir)/logs
  local latest
  latest=$(ls "$peer_log_dir"/*.md 2>/dev/null | sort | tail -1 || true)
  [[ -z "$latest" || ! -f "$latest" ]] && return 0
  local result
  result=$(grep -oE '(AGREE|REFINE|OBJECT|ESCALATE)' "$latest" 2>/dev/null | tail -1 || true)
  local mod_time now days
  mod_time=$(_file_mtime "$latest")
  now=$(date +%s)
  days=$(( (now - mod_time) / 86400 ))
  printf '%s|%s' "${result:-—}" "${days}"
}

# ③ Pipeline counts → Idea Backlog Build (Verify/Release reserved).
_dash_pipeline_counts() {
  [[ -f ".roll/backlog.md" ]] || { echo "0 0 0 0 0"; return; }
  local idea backlog build
  idea=$(grep -E '^\| IDEA-' .roll/backlog.md 2>/dev/null | grep -F '| 📋 Todo |' | wc -l | tr -d ' ')
  backlog=$(grep -E '^\| (\[?US-|FIX-|REFACTOR-)' .roll/backlog.md 2>/dev/null | grep -F '| 📋 Todo |' | wc -l | tr -d ' ')
  build=$(grep -F '| 🔨 In Progress |' .roll/backlog.md 2>/dev/null | wc -l | tr -d ' ')
  printf '%s %s %s 0 0' "$idea" "$backlog" "$build"
}

# ④ DoD AC signal — read [x]/total checkboxes for a US section in feature doc.
# Echoes "x/total"; "0/0" if no checkboxes found.
_dash_ac_completion() {
  local feature_link="$1"
  [[ -z "$feature_link" ]] && { echo "0/0"; return; }
  local path="${feature_link%%#*}"
  local anchor="${feature_link##*#}"
  [[ ! -f "$path" ]] && { echo "0/0"; return; }
  # Extract the section from <a id="anchor"></a> or ## heading to next ## heading.
  local section
  section=$(awk -v anc="$anchor" '
    BEGIN{in_sec=0}
    /^<a id="/{
      gsub(/<a id="|"><\/a>/, "")
      if ($0 == anc) { in_sec=1; next }
    }
    in_sec && /^## /{ if(!started){ started=1; next } else { exit } }
    in_sec && started { print }
    in_sec { started_default=1 }
  ' "$path" 2>/dev/null)
  [[ -z "$section" ]] && {
    # Fallback: match heading line containing the anchor pattern directly.
    section=$(awk -v pat="$anchor" 'BEGIN{IGNORECASE=1}
      tolower($0) ~ pat && /^## /{p=1;next}
      p && /^## /{exit}
      p{print}' "$path" 2>/dev/null)
  }
  local done total
  done=$(echo "$section" | grep -cE '\[x\]' || echo 0)
  total=$(echo "$section" | grep -cE '\[[ x]\]' || echo 0)
  printf '%s/%s' "$done" "$total"
}

# ④ DoD CI signal — query gh for HEAD's most-recent run conclusion.
# Returns: success | pending | failure | none
_dash_ci_status() {
  _gh_available || { echo "none"; return; }
  local commit; commit=$(git rev-parse HEAD 2>/dev/null) || { echo "none"; return; }
  local slug; slug=$(_gh_repo_slug 2>/dev/null) || true
  local out
  if [[ -n "$slug" ]]; then
    out=$(gh -R "$slug" run list --commit "$commit" --json status,conclusion 2>/dev/null) || { echo "none"; return; }
  else
    out=$(gh run list --commit "$commit" --json status,conclusion 2>/dev/null) || { echo "none"; return; }
  fi
  [[ -z "$out" || "$out" == "[]" ]] && { echo "none"; return; }
  local concl status
  concl=$(echo "$out" | jq -r '.[0].conclusion // ""' 2>/dev/null)
  status=$(echo "$out" | jq -r '.[0].status // ""' 2>/dev/null)
  if [[ "$status" == "in_progress" || "$status" == "queued" ]]; then
    echo "pending"
  elif [[ "$concl" == "success" ]]; then
    echo "success"
  elif [[ -n "$concl" ]]; then
    echo "failure"
  else
    echo "pending"
  fi
}

# ⑤ Active ALERT count (number of "# ALERT" headings in ALERT.md, 0 if absent).
_dash_alert_count() {
  [[ -f "$_LOOP_ALERT" ]] || { echo 0; return; }
  grep '^# ALERT' "$_LOOP_ALERT" 2>/dev/null | wc -l | tr -d ' '
}

# ⑤ Pending proposal count — "## PROPOSAL:" entries in .roll/proposals.md.
_dash_proposal_count() {
  [[ -f ".roll/proposals.md" ]] || { echo 0; return; }
  grep '^## PROPOSAL' .roll/proposals.md 2>/dev/null | wc -l | tr -d ' '
}

# ⑤ Release-ready signal — true iff there are releasable commits since the
# latest tag AND the latest brief signals 可发版/Release ready. Releasable =
# any commit since the latest tag whose subject does NOT start with the
# release-irrelevant prefixes `docs:` or `chore:`. Prevents the flag from
# sticking on after a release when only docs rewrites land on top of the tag
# (FIX-033 symptom 2).
_dash_release_ready() {
  local latest_tag
  latest_tag=$(git describe --tags --abbrev=0 2>/dev/null) || return 1
  local commits_with_code
  commits_with_code=$(git log "${latest_tag}..HEAD" --pretty=format:%s 2>/dev/null \
    | grep -vE '^(docs|chore)(\([^)]*\))?:[[:space:]]' \
    | wc -l | tr -d ' ')
  [[ "${commits_with_code:-0}" -gt 0 ]] || return 1
  local latest
  latest=$(ls .roll/briefs/*.md 2>/dev/null | sort | tail -1 || true)
  [[ -z "$latest" ]] && return 1
  grep -qE '✅ 可发版|Release ready' "$latest" 2>/dev/null
}

# ⑥ Latest brief summary — first non-trivial line after frontmatter.
_dash_brief_summary() {
  local latest="$1"
  [[ -z "$latest" || ! -f "$latest" ]] && return 0
  awk '
    NR==1 && /^#/ { next }       # skip H1 title
    /^>/ { next }                # skip blockquote
    /^---$/ { next }
    /^$/ { next }
    /^## /{ gsub(/^## */,""); print; exit }
    /^[^[:space:]]/{ print; exit }
  ' "$latest" 2>/dev/null | head -1 | cut -c1-60
}

_legacy_home() {
  local project_path; project_path=$(pwd -P)
  local project_name; project_name=$(basename "$project_path")
  local agent; agent=$(_project_agent)
  local git_state; git_state=$(_dash_git_status)
  local is_darwin=false
  [[ "$(uname)" == "Darwin" ]] && is_darwin=true

  # ── ① Identity ─────────────────────────────────────────────────────────────
  echo ""
  printf "  ${BOLD}${CYAN}%s${NC}  ${YELLOW}v%s${NC}  ${BOLD}·${NC}  agent ${CYAN}%s${NC}  ${BOLD}·${NC}  git " \
    "$project_name" "$VERSION" "$agent"
  case "$git_state" in
    ✓) printf "${GREEN}✓${NC}\n" ;;
    dirty) printf "${YELLOW}dirty${NC}\n" ;;
    *) printf "${YELLOW}%s${NC}\n" "$git_state" ;;
  esac
  echo ""

  # ── ② AI 自治 — 三层 × 四道防线 (主视觉) ────────────────────────────────
  echo -e "$(msg backlog.ai ${BOLD} ${NC})"

  # Loop layer
  local loop_state="not-installed"
  local _dash_loop_paused=false
  [[ -f "$_LOOP_STATE" ]] && grep -q "^status: paused" "$_LOOP_STATE" 2>/dev/null && _dash_loop_paused=true
  if $is_darwin; then
    loop_state=$(_launchd_svc_state "loop" "$project_path")
  else
    crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}" && loop_state="enabled"
  fi
  local active_start active_end dream_hour dream_minute
  local _aw; _aw=$(_loop_read_active_window "$project_path")
  active_start="${_aw%% *}"; active_end="${_aw##* }"
  # US-LOOP-013: use schedule spec for display
  local loop_spec loop_period loop_offset
  loop_spec=$(_loop_schedule_spec "$project_path")
  loop_period="${loop_spec%% *}"
  loop_offset="${loop_spec##* }"
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")

  local loop_badge loop_sched
  loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
  loop_sched="${loop_sched}  active ${active_start}:00–${active_end}:00"
  case "$loop_state" in
    enabled)       loop_badge="${GREEN}● enabled${NC}" ;;
    installed-off) loop_badge="${YELLOW}⚠ off${NC}" ;;
    *)             loop_badge="${RED}○ missing${NC}" ;;
  esac
  $_dash_loop_paused && loop_badge="${YELLOW}⏸ paused${NC}"
  printf "  Loop Layer    %b  %s\n" "$loop_badge" "$loop_sched"

  # Loop "Now:" line — current in-progress story, if any.
  local in_prog; in_prog=$(_dash_in_progress_story)
  if [[ -n "$in_prog" ]]; then
    local p_id p_desc
    p_id=${in_prog%%|*}
    p_desc=$(echo "$in_prog" | awk -F'|' '{print $2}')
    printf "                Now: ${BOLD}🔨 %s${NC}  %s\n" "$p_id" "$p_desc"
  else
    printf "                Now: ${DIM:-}idle${NC}\n"
  fi

  # last TCR + today count
  local last_tcr_min today_tcr
  last_tcr_min=$(_dash_last_tcr_minutes)
  today_tcr=$(_dash_tcr_today_count)
  if [[ -n "$last_tcr_min" ]]; then
    printf "                last TCR ${CYAN}%smin${NC} ago · ${CYAN}%s${NC} micro-commits today\n" "$last_tcr_min" "$today_tcr"
  else
    printf "                no tcr commits yet\n"
  fi

  # Dream layer
  local dream_state="not-installed"
  $is_darwin && dream_state=$(_launchd_svc_state "dream" "$project_path")
  local dream_badge
  case "$dream_state" in
    enabled)       dream_badge="${GREEN}● enabled${NC}" ;;
    installed-off) dream_badge="${YELLOW}⚠ off${NC}" ;;
    *)             dream_badge="${RED}○ missing${NC}" ;;
  esac
  printf "  Dream Layer   %b  %02d:%02d\n" "$dream_badge" "$dream_hour" "$dream_minute"
  local dream_hours refac_pending
  dream_hours=$(_dash_last_dream_hours)
  refac_pending=$(_dash_refactor_pending)
  if [[ -n "$dream_hours" ]]; then
    printf "                Last scan ${CYAN}%sh${NC} ago → ${CYAN}%s${NC} REFACTOR queued\n" "$dream_hours" "$refac_pending"
  else
    printf "                no scan yet → ${CYAN}%s${NC} REFACTOR queued\n" "$refac_pending"
  fi

  # Peer layer
  local peer; peer=$(_dash_last_peer)
  printf "  Peer Layer    ${GREEN}● ready${NC}    on complexity=large\n"
  if [[ -n "$peer" ]]; then
    local peer_res peer_days
    peer_res=${peer%%|*}
    peer_days=${peer##*|}
    printf "                Last call ${CYAN}%sd${NC} ago · %s\n" "$peer_days" "$peer_res"
  else
    printf "                Last call —\n"
  fi

  # 四道防线
  echo -e "$(msg backlog. ${BOLD} ${NC})"
  local def_tcr="${RED}○${NC}" def_review="${GREEN}●${NC}" def_spar="${YELLOW}○${NC}" def_sentinel="${YELLOW}○ off${NC}"
  if [[ -n "$last_tcr_min" ]]; then
    def_tcr="${GREEN}● ${last_tcr_min}min${NC}"
  fi
  printf "  TCR %b   Spar %b   Auto Review %b   Sentinel %b\n" \
    "$def_tcr" "$def_spar" "$def_review" "$def_sentinel"
  echo -e "  ${BOLD}╚══════════════════════════════════════════════════════════╝${NC}"
  echo ""

  # ── ③ Pipeline 全景 ────────────────────────────────────────────────────────
  read -r pl_idea pl_backlog pl_build pl_verify pl_release <<< "$(_dash_pipeline_counts)"
  local build_color="${DIM:-}"
  (( pl_build > 0 )) && build_color="${BOLD}${YELLOW}"
  printf "  ${BOLD}📦 Pipeline${NC}   Idea %s ▸ Backlog %s ▸ Build %b%s🔨${NC} ▸ Verify %s ▸ Release %s\n" \
    "$pl_idea" "$pl_backlog" "$build_color" "$pl_build" "$pl_verify" "$pl_release"
  echo ""

  # ── ④ Current Focus · DoD (仅当 Build > 0) ──────────────────────────────
  if [[ -n "$in_prog" && "$pl_build" -gt 0 ]]; then
    local p_id p_desc p_link
    p_id=${in_prog%%|*}
    p_desc=$(echo "$in_prog" | awk -F'|' '{print $2}')
    p_link=$(echo "$in_prog" | awk -F'|' '{print $3}')
    local ac_ratio; ac_ratio=$(_dash_ac_completion "$p_link")
    local ac_done="${ac_ratio%%/*}" ac_total="${ac_ratio##*/}"
    local ac_badge ci_badge
    if [[ "$ac_total" != "0" && "$ac_done" == "$ac_total" ]]; then
      ac_badge="${GREEN}✓ AC${NC}"
    else
      ac_badge="${YELLOW}○ AC ${ac_done}/${ac_total}${NC}"
    fi
    local ci_state; ci_state=$(_dash_ci_status)
    case "$ci_state" in
      success) ci_badge="${GREEN}✓ CI${NC}" ;;
      pending) ci_badge="${YELLOW}… CI${NC}" ;;
      failure) ci_badge="${RED}✗ CI${NC}" ;;
      *)       ci_badge="${YELLOW}○ CI${NC}" ;;
    esac
    printf "  ${BOLD}📊 Current Focus · DoD${NC}\n"
    printf "    🔨 ${BOLD}%s${NC}  %s\n" "$p_id" "$p_desc"
    printf "    [%b]  [%b]\n" "$ac_badge" "$ci_badge"
    printf "$(msg backlog.4_dod_see_us_auto_030 ${YELLOW} ${NC})"
    echo ""
  fi

  # ── ⑤ Human × AI 介入区 ───────────────────────────────────────────────────
  local alerts proposals release_ready
  alerts=$(_dash_alert_count); alerts=${alerts//[^0-9]/}; alerts=${alerts:-0}
  proposals=$(_dash_proposal_count); proposals=${proposals//[^0-9]/}; proposals=${proposals:-0}
  release_ready=false; _dash_release_ready && release_ready=true
  printf "$(msg backlog.n ${BOLD} ${NC})"
  if (( alerts == 0 )) && (( proposals == 0 )) && ! $release_ready; then
    printf "$(msg backlog.ai_2 ${GREEN} ${NC})"
  else
    (( alerts > 0 )) && printf "    ${RED}⚠ %s ALERT${NC}          run: roll alert\n" "$alerts"
    (( proposals > 0 )) && printf "    ${YELLOW}📋 %s PROPOSAL${NC}      see: .roll/proposals.md\n" "$proposals"
    $release_ready && printf "    ${GREEN}✓ Release ready${NC}    run: roll release\n"
  fi
  echo ""

  # ── ⑥ Schedules & Last Brief ──────────────────────────────────────────────
  printf "  ${BOLD}⏰ Schedules & Last Brief${NC}\n"
  local loop_sched_short; loop_sched_short=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
  # FIX-195: brief loop retired — schedule line shows loop + dream only.
  printf "    %s · dream %02d:%02d\n" \
    "$loop_sched_short" "$dream_hour" "$dream_minute"
  local latest_brief; latest_brief=$(ls .roll/briefs/*.md 2>/dev/null | sort | tail -1 || true)
  if [[ -n "$latest_brief" ]]; then
    local mod_time now age summary
    mod_time=$(_file_mtime "$latest_brief")
    now=$(date +%s); age=$(( (now - mod_time) / 3600 ))
    summary=$(_dash_brief_summary "$latest_brief")
    printf "    Brief ${CYAN}%sh${NC} ago — %s\n" "$age" "${summary:-—}"
  else
    printf "    Brief: ${YELLOW}none yet${NC} — run: roll brief\n"
  fi
  echo ""
}

_home() {
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-home.py" "$@"
  else
    _legacy_home "$@"
  fi
}

# ═══════════════════════════════════════════════════════════════════════════════
# MAIN
# ═══════════════════════════════════════════════════════════════════════════════
_legacy_help() {
  echo -e "${CYAN} ██████╗  ██████╗ ██╗     ██╗     ${NC}"
  echo -e "${CYAN} ██╔══██╗██╔═══██╗██║     ██║     ${NC}"
  echo -e "${CYAN} ██████╔╝██║   ██║██║     ██║     ${NC}"
  echo -e "${CYAN} ██╔══██╗██║   ██║██║     ██║     ${NC}"
  echo -e "${CYAN} ██║  ██║╚██████╔╝███████╗███████╗${NC}"
  echo -e "${CYAN} ╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚══════╝${NC}"
  echo ""
  echo -e "  ${BOLD}v${VERSION}${NC} — Roll out features with AI agents"
  echo ""
  echo "Usage: roll <command> [options]"
  echo "$(msg backlog.usage_roll_command_options)"
  echo ""
  echo "Commands:"
  echo "$(msg backlog.setup_f_machine_first_time_install)"
  echo "$(msg backlog.update_upgrade_npm_install_latest_re)"
  echo "$(msg backlog.init_project_create_agents_md_roll)"
  echo "$(msg backlog.offboard_confirm_project_reverse_a_previous)"
  echo "$(msg backlog.status_diagnostic_show_current_state)"
  echo "$(msg backlog.peer_peer_review_cross_agent_negotiation)"
  echo "$(msg backlog.loop_on_off_now_status_monitor)"
  echo "$(msg backlog.brief_digest_show_latest_owner_brief)"
  echo "$(msg backlog.backlog_view_show_pending_tasks_todo)"
  echo "$(msg backlog.backlog_block_pat_reason_mark_matching)"
  echo "$(msg backlog.backlog_defer_pat_reason_mark_matching)"
  echo "$(msg backlog.backlog_unblock_pat_restore_matching_items)"
  echo "$(msg backlog.backlog_unstick_dry_run_revert_in)"
  echo "$(msg backlog.backlog_lint_check_descriptions_for_path)"
  echo "$(msg backlog.agent_use_name_list_config_per)"
  echo "$(msg backlog.ci_wait_ci_show_or_wait)"
  echo "$(msg backlog.review_pr_number_pr_review_ai)"
  echo ""
  echo "$(msg backlog.examples)"
  echo "$(msg backlog.roll_setup_new_machine_first_time)"
  echo "$(msg backlog.roll_update_upgrade_to_latest_version)"
  echo "$(msg backlog.roll_init_new_or_re_merge)"
  echo "$(msg backlog.roll_loop_on_enable_autonomous_loop)"
  echo "$(msg backlog.roll_brief_show_latest_brief)"
  echo "$(msg backlog.roll_backlog_show_pending_blocked_deferred)"
  echo "$(msg backlog.roll_backlog_defer_us_doc)"
  echo "$(msg backlog.roll_backlog_block_us_hw_001)"
  echo "$(msg backlog.roll_agent_use_kimi_switch_this)"

}

_help() {
  if [[ "${ROLL_UI:-v2}" == "v2" ]]; then
    python3 "${ROLL_PKG_DIR}/lib/roll-help.py" "$@"
  else
    _legacy_help "$@"
  fi
}

# ═══════════════════════════════════════════════════════════════════════════════
# _check_structure — US-ONBOARD-004
#
# Refuse to run project commands on legacy directory structure. Pushes users
# toward `roll migrate` rather than letting commands silently operate on old
# paths and produce confusing results.
#
# Exempt commands (always allowed regardless of structure):
#   setup, update, version/--version/-v, help/--help/-h, migrate, doctor, skills
#   "" (no command — shows home/help)
#   init — has its own structure-aware logic inside cmd_init
#
# Detection walks from pwd up to git root (or stays at pwd if not in a git repo).
# Decision: old structure markers present AND no .roll/ → refuse.
#
# Bypass: ROLL_SKIP_STRUCTURE_CHECK=1 (used by integration tests until Story 5
# migrates the test fixtures to new structure).
# ═══════════════════════════════════════════════════════════════════════════════
_check_structure() {
  [[ "${ROLL_SKIP_STRUCTURE_CHECK:-0}" == "1" ]] && return 0

  local cmd="$1"
  case "$cmd" in
    setup|update|migrate|doctor|skills|version|--version|-v|help|--help|-h|"") return 0 ;;
    init) return 0 ;;  # cmd_init handles its own structure logic
    offboard) return 0 ;;  # cmd_offboard does its own changeset check
  esac

  # Determine project root: git root if available, else pwd
  local root
  if root=$(git rev-parse --show-toplevel 2>/dev/null); then
    :
  else
    root="$(pwd -P)"
  fi

  # If new structure exists, allow
  [[ -d "$root/.roll" ]] && return 0

  # FIX-156: nested-repo escape — when cwd is inside `.roll/` (a Roll project's
  # nested private roll-meta worktree), git rev-parse returns `.roll/` itself
  # instead of the outer Roll project, so the check above misses the outer
  # `.roll/` and trips the legacy warning on the project's own roll-meta files
  # (which contain BACKLOG.md / etc by definition). Walk up from $root and
  # allow when any ancestor directory has a `.roll/` sibling — that is the
  # outer Roll project, and the user is operating on it from a sub-checkout.
  local _probe; _probe="$(dirname "$root")"
  while [[ "$_probe" != "/" && "$_probe" != "." && -n "$_probe" ]]; do
    if [[ -d "$_probe/.roll" ]]; then return 0; fi
    local _parent; _parent="$(dirname "$_probe")"
    [[ "$_parent" == "$_probe" ]] && break  # reached fs root
    _probe="$_parent"
  done

  # US-ONBOARD-019: only treat the directory as a legacy *Roll* project when an
  # old-path marker is present AND a Roll-specific content signature confirms
  # the project was actually onboarded with Roll. Otherwise we'd block any
  # non-Roll repo that happens to ship a BACKLOG.md (Jira export, board dump,
  # different tooling) or a generic docs/features/ folder.
  local _has_old_path=false
  if [[ -f "$root/BACKLOG.md" ]] \
     || [[ -f "$root/PROPOSALS.md" ]] \
     || [[ -d "$root/docs/features" ]] \
     || [[ -d "$root/docs/briefs" ]] \
     || [[ -d "$root/docs/dream" ]]; then
    _has_old_path=true
  fi

  if [[ "$_has_old_path" == "true" ]] && _has_roll_signature "$root"; then
    err "$(msg check_structure.detected "$root")"
    echo "" >&2
    echo "  $(msg check_structure.pre_2_0_layout)" >&2
    echo "  $(msg check_structure.run_migration)" >&2
    echo "" >&2
    echo "    roll migrate --dry-run       $(msg check_structure.preview_changes)" >&2
    echo "    roll migrate                 $(msg check_structure.execute)" >&2
    echo "" >&2
    echo "  $(msg check_structure.migration_guide): ${ROLL_PKG_DIR}/guide/en/migration-2.0.md" >&2
    echo "" >&2
    echo "  $(msg check_structure.roll_back):" >&2
    echo "    npm install -g @seanyao/roll@1" >&2
    exit 1
  fi

  # No structure detected — empty project or non-Roll dir. Allow.
  return 0
}

main() {
  local cmd="${1:-}"
  shift || true

  # US-ONBOARD-004: refuse to run project commands on legacy structure
  _check_structure "$cmd"

  case "$cmd" in
    setup)         cmd_setup "$@" ;;
    update)        cmd_update "$@" ;;
    init)          cmd_init "$@" ;;
    offboard)      cmd_offboard "$@" ;;
    migrate)       cmd_migrate "$@" ;;
    status)        cmd_status "$@" ;;
    peer)          cmd_peer "$@" ;;
    loop)          cmd_loop "$@" ;;
    brief)         cmd_brief "$@" ;;
    backlog)       cmd_backlog "$@" ;;
    alert)         cmd_alert "$@" ;;
    feedback)      cmd_feedback "$@" ;;
    lang)          cmd_lang "$@" ;;
    agent)         cmd_agent "$@" ;;
    ci)            cmd_ci "$@" ;;
    doctor)        cmd_doctor "$@" ;;
    skills)        cmd_skills "$@" ;;
    review-pr)     cmd_review_pr "$@" ;;
    slides)        cmd_slides "$@" ;;
    test)          cmd_test "$@" ;;
    prices)        cmd_prices "$@" ;;
    changelog)     cmd_changelog "$@" ;;
    consistency)   cmd_consistency "$@" ;;
    config)        cmd_config "$@" ;;
    _loop_render_exit_summary) _loop_render_exit_summary "$@" ;;
    _loop_pr_inbox) _loop_pr_inbox "$@" ;;
    version|--version|-v) echo "roll v${VERSION}" ;;
    help|--help|-h) _help "$@" ;;
    "") [[ -f ".roll/backlog.md" ]] && _home || { _help; _show_changelog; } ;;
    *)
      err "$(msg main.unknown_command "$cmd")"
      echo ""
      _help
      exit 1
      ;;
  esac
}

# ─── Show recent changelog entries ────────────────────────────────────────────
_show_changelog() {
  local changelog="${ROLL_PKG_DIR}/CHANGELOG.md"
  [[ -f "$changelog" ]] || return 0

  echo -e "${BOLD}$(msg changelog.heading):${NC}"

  local count=0 in_section=false
  while IFS= read -r line; do
    if [[ "$line" =~ ^##\  ]]; then
      (( ++count > 3 )) && break
      in_section=true
      echo ""
      echo -e "  ${CYAN}${line#\#\# }${NC}"
    elif [[ "$in_section" == true && -n "$line" ]]; then
      echo "    $line"
    fi
  done < "$changelog"
  echo ""
}

# ─── Version check (background, non-blocking, 24h cache) ─────────────────────
# FIX-166: drop the cached `latest` so the next run refetches it. Called after a
# successful `roll update`: without this, the 24h cache TTL keeps the pre-upgrade
# `latest` around, and _notify_update reverse-nags the freshly-installed (newer)
# version to "upgrade" to the older cached one for up to a day. With the file
# gone, _notify_update stays silent until the async refetch repopulates it.
_invalidate_update_cache() {
  rm -f "${ROLL_HOME}/.update-check"
}

# FIX-170: the cache file is `<ts> <latest> <writer-version>` — the 3rd field
# binds it to the binary version that wrote it. FIX-166's explicit invalidation
# only covers `roll update` run by a NEW binary; an upgrade executed by an old
# binary (the 2.602.2→2.602.4 transition) or out-of-band (npm -g / brew / git)
# left a stale cache that reverse-nagged for up to 24h. A writer-version
# mismatch now means "stale" regardless of TTL: refetch, and stay silent until
# the refetch lands. Legacy 2-field caches have no writer → auto-invalidated.
_check_update_async() {
  local cache="${ROLL_HOME}/.update-check"
  local now; now=$(date +%s)
  local last=0 writer=""
  if [[ -f "$cache" ]]; then
    last=$(awk '{print $1}' "$cache" 2>/dev/null || echo 0)
    writer=$(awk '{print $3}' "$cache" 2>/dev/null || true)
  fi
  [[ "$writer" == "$VERSION" ]] && (( now - ${last:-0} < 86400 )) && return

  {
    local latest
    latest=$(curl -sf --max-time 5 \
      "https://api.github.com/repos/seanyao/roll/releases/latest" \
      | grep '"tag_name"' | sed 's/.*"v\([^"]*\)".*/\1/' 2>/dev/null || true)
    # `-` placeholder keeps the field positions stable when the fetch fails.
    echo "$now ${latest:--} $VERSION" > "$cache"
  } &
  disown
}

_notify_update() {
  local cache="${ROLL_HOME}/.update-check"
  [[ -f "$cache" ]] || return 0
  local latest writer
  latest=$(awk '{print $2}' "$cache" 2>/dev/null || true)
  writer=$(awk '{print $3}' "$cache" 2>/dev/null || true)
  # FIX-170: cache written by a different binary version is stale — stay
  # silent; _check_update_async has already kicked off the refetch.
  [[ "$writer" != "$VERSION" ]] && return
  [[ -z "$latest" || "$latest" == "-" || "$latest" == "$VERSION" ]] && return
  # FIX-163: the cached `latest` is GitHub's releases/latest — the newest
  # release by created_at, NOT by semver. Under the MAJOR.MMDD scheme a plain
  # `sort -V` mis-ranks versions across the year-based→MAJOR.MMDD transition
  # (2026.601.4 > 2.602.1) and the Jan-1 MMDD wrap (2.1231.N > 2.101.1), which
  # previously (a) reverse-nagged users to "upgrade" to an older release and
  # (b) silently suppressed real updates after the wrap. Trust GitHub's
  # chronological latest: if it differs from what's running, surface it.
  echo ""
  warn "$(msg update.available "$latest")"
}

if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
  _check_update_async
  main "$@"
  _notify_update
fi
