#!/usr/bin/env bash
# wogi-claude — Task-boundary session restart wrapper for Claude Code.
#
# When the WogiFlow TaskCompleted hook (with taskBoundaryReset.enabled: true)
# finishes a task, it writes a restart flag to $WOGI_RESTART_FLAG and sends
# SIGTERM to its parent Claude Code process. Claude Code exits cleanly; this
# wrapper detects the flag and restarts claude in a fresh context. All WogiFlow
# state is preserved on disk and re-hydrated via SessionStart hook.
#
# Why this exists: Claude Code has no programmatic /clear or in-session reset.
# Prior-conversation tokens accumulate across tasks, burning 10-20% of context
# budget before new work begins. This wrapper recovers that budget cleanly.
#
# Usage:
#   Direct:  wogi-claude [claude args...]
#   PATH:    ensure ~/.wogiflow/bin is on PATH, then run `wogi-claude` instead of `claude`
#   Opt-out: pass --no-wogi-restart to run claude once without the restart loop
#
# Environment:
#   WOGI_RESTART_FLAG    — path to restart-flag file (default: <cwd>/.workflow/state/restart-requested)
#   WOGI_MAX_RESTARTS    — safety cap, default 50 (prevents runaway restart storms)
#   WOGI_WRAPPER_PID     — exported to child; hook checks this to confirm wrapper is present
#   WOGI_CLAUDE_BIN      — override path to claude binary (default: found via PATH)
#   WOGI_BASH_BIN        — (wf-ee4e343b cleanup) override the bash binary used
#                          by the PID-alignment subshell trick. Defaults to
#                          `bash` on PATH. Useful on minimal containers
#                          (Alpine, distroless) where bash lives at a
#                          non-standard path or where the shell wrapping the
#                          claude CLI is not bash-by-default.
#   WOGI_USE_EXPECT      — (EXPERIMENTAL, v2.22.4+) set to 1 to opt IN to the
#                          expect-based auto-dismiss of the "Loading development
#                          channels" dialog. OFF BY DEFAULT because Ink's
#                          ANSI-rich output can cause expect's text match to
#                          miss, which deadlocks the dialog (user keystrokes
#                          get held by expect, not forwarded to claude). If
#                          you can confirm it works for your terminal, opt in
#                          and enjoy zero-click restarts.
#   WOGI_NO_EXPECT       — legacy opt-out from 2.22.3. Still honored (forces
#                          expect off regardless of WOGI_USE_EXPECT).
#   WOGI_EXPECT_TIMEOUT  — override the expect timeout (default 30s) for watching
#                          the dialog. After timeout we hand off to the user
#                          unconditionally.

set -u

# --- Resolve helper paths (for expect-based dialog auto-dismiss) ---
WOGI_CLAUDE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WOGI_EXPECT_SCRIPT="$WOGI_CLAUDE_DIR/wogi-claude-expect.exp"

# Detect whether to use the expect wrapper for auto-dismissing the
# --dangerously-load-development-channels modal.
#
# Precedence (highest to lowest):
#   1. WOGI_NO_EXPECT=1 → always OFF (kill switch)
#   2. Workspace worker mode → ON automatically (headless, cannot Enter by hand)
#   3. WOGI_USE_EXPECT=1 → ON (explicit opt-in for interactive users)
#   4. Default → OFF (interactive users get the native Claude Code dialog)
#
# Worker auto-enable (v2.26.2): WOGI_WORKSPACE_ROOT + WOGI_REPO_NAME (worker
# side) are set by `flow workspace start` before spawning this wrapper, so
# detection here is reliable. Interactive users never set these vars, so their
# default remains opt-in — the v2.22.3 regression (expect's text match miss on
# Ink ANSI output) is bounded to users who explicitly asked for expect.
#
# v2.26.3: the expect script now uses `interact -o -re` from second zero —
# stdin flows to claude throughout, so user keystrokes are NEVER captured
# by expect. The v2.26.2 approach (rolling buffer + ANSI strip in an expect
# watch block) had a worse failure mode: on match miss, expect owned stdin
# for 30s and user keystrokes went into its buffer, making the dialog
# appear frozen. The new interact approach falls back gracefully — mismatch
# just means the user dismisses the dialog themselves, no black hole.

__wogi_is_worker=0
if [ -n "${WOGI_WORKSPACE_ROOT:-}" ] && [ -n "${WOGI_REPO_NAME:-}" ] && \
   [ "${WOGI_REPO_NAME}" != "manager" ]; then
  __wogi_is_worker=1
fi

# --- wf-8294d960 (Story A): worker MCP-stripping + init banner ---
#
# Root cause (measured 2026-04-24): cold-boot of claude --print in a project
# with claude.ai OAuth + 7 integrations takes 10-20s; claude --bare --print is
# <1s. The ~10-19s gap is Claude Code's init (OAuth + remote MCP handshakes
# + LSP + plugin sync + CLAUDE.md discovery + background prefetches).
# WogiFlow's SessionStart hook is ~128ms — <1% of the problem. See
# .workflow/scratch/wf-8294d960-investigation/root-cause.md for full data.
#
# Workers are specialized autonomous code executors; they typically do NOT
# need Gmail/Slack/Atlassian/etc. integrations for refactoring code. Stripping
# claude.ai MCP from worker boot saves ~3-7s per restart. Users who genuinely
# need these integrations per-worker can opt in via the config key below.
#
# Opt-in: set config.workspace.inheritClaudeAiMcpIntegrations to true in
# .workflow/config.json (or env var WOGI_WORKER_INHERIT_MCP=1) to keep
# claude.ai MCP integrations active in worker mode. Default: strip (fast).
__wogi_strip_mcp=0
if [ "$__wogi_is_worker" -eq 1 ]; then
  __wogi_strip_mcp=1
  # Env override wins
  if [ "${WOGI_WORKER_INHERIT_MCP:-}" = "1" ]; then
    __wogi_strip_mcp=0
  elif command -v node >/dev/null 2>&1; then
    # Check config.json; fail-open (strip on any error, matching spec non-negotiable)
    __wogi_config_inherit="$(node -e '
      try {
        const cfg = require(process.cwd() + "/.workflow/config.json");
        process.stdout.write(String(!!(cfg.workspace && cfg.workspace.inheritClaudeAiMcpIntegrations)));
      } catch (_err) { process.stdout.write("false"); }
    ' 2>/dev/null)"
    if [ "$__wogi_config_inherit" = "true" ]; then
      __wogi_strip_mcp=0
    fi
  fi
fi

# Resolve the channel-only MCP config used for stripping. Persistent path
# so we don't regenerate on every restart; living in .workflow/state/ keeps
# it alongside other worker state.
#
# IMPORTANT — REGRESSION FIX (audit-channel-transport-001):
#
# The original Story A (wf-8294d960) wrote `{"mcpServers":{}}` — fully
# empty — under the (unverified) assumption that workers don't need any
# MCP servers in worker mode. That assumption was WRONG: it stripped
# `wogi-workspace-channel` which IS the transport that the manager uses
# to dispatch tasks to workers via `workspace_send_message` (the manager
# HTTP-POSTs to the worker's channel-server port; with no MCP server,
# there's no listener, so dispatches silently fail with "connection
# refused"). Tier-3 evidence (end-to-end manager→worker dispatch) was
# never collected; only boot-latency was measured. Story B
# (wf-ab59f0e4) layered COMPLETION-SUMMARY routing on top of this
# broken transport without auditing the dependency.
#
# The proper fix: extract ONLY the `wogi-workspace-channel` entry from
# the worker's real `.mcp.json` and write a channel-only config. This
# preserves Story A's boot-speed win (claude.ai MCP integrations stay
# stripped) while keeping the workspace transport active. If the
# worker's `.mcp.json` doesn't define `wogi-workspace-channel` (e.g.
# this is not a workspace member), fall back to the empty MCP config
# (the strip is harmless in non-workspace contexts).
#
# SEC-003 fix (2026-04-26): validate WOGI_WORKSPACE_ROOT before using it as
# a destination path. Without validation, an attacker who can set the env
# var could redirect the channel-only MCP config write to an arbitrary
# path. Rules:
#   1. Must be absolute (start with /).
#   2. Must point to an existing directory.
#   3. Must NOT contain '..' segments (traversal guard).
# On any validation failure, fall back to $(pwd) which is bounded by the
# current working directory.
__wogi_empty_mcp_config=""
if [ "$__wogi_strip_mcp" -eq 1 ]; then
  __wogi_workspace_root_raw="${WOGI_WORKSPACE_ROOT:-}"
  __wogi_workspace_root_safe=""
  if [ -n "$__wogi_workspace_root_raw" ] \
     && [ "${__wogi_workspace_root_raw#/}" != "$__wogi_workspace_root_raw" ] \
     && [ -d "$__wogi_workspace_root_raw" ] \
     && [ "${__wogi_workspace_root_raw#*..}" = "$__wogi_workspace_root_raw" ]; then
    __wogi_workspace_root_safe="$__wogi_workspace_root_raw"
  else
    __wogi_workspace_root_safe="$(pwd)"
    if [ -n "$__wogi_workspace_root_raw" ]; then
      echo "[wogi-claude] WARNING: WOGI_WORKSPACE_ROOT='$__wogi_workspace_root_raw' failed validation (must be absolute, exist, no '..'); falling back to $(pwd)" >&2
    fi
  fi
  __wogi_empty_mcp_config="$__wogi_workspace_root_safe/.workflow/state/worker-channel-only-mcp.json"
  __wogi_member_mcp_path="$(pwd)/.mcp.json"
  if command -v node >/dev/null 2>&1; then
    # Use the dedicated helper (testable; see tests/flow-worker-mcp-strip.test.js).
    # Extracts ONLY the wogi-workspace-channel entry from the worker's real
    # .mcp.json so manager-side workspace_send_message dispatch keeps working.
    __wogi_strip_helper=""
    for __wogi_candidate in \
      "$(dirname "$0")/../scripts/flow-worker-mcp-strip.js" \
      "$(npm root -g 2>/dev/null)/wogiflow/scripts/flow-worker-mcp-strip.js" \
      "$(pwd)/node_modules/wogiflow/scripts/flow-worker-mcp-strip.js"; do
      if [ -f "$__wogi_candidate" ]; then
        __wogi_strip_helper="$__wogi_candidate"
        break
      fi
    done
    if [ -n "$__wogi_strip_helper" ]; then
      node "$__wogi_strip_helper" "$__wogi_member_mcp_path" "$__wogi_empty_mcp_config" 2>/dev/null || __wogi_strip_mcp=0
    else
      # Helper not found — fall back to inline extraction (legacy code path
      # for installs that pre-date the helper script).
      #
      # arch-004 (2026-04-26): even in the fallback, scrub prototype-pollution
      # keys from the parsed .mcp.json before re-emitting. This keeps the
      # bash-inline path consistent with the canonical helper's safety
      # guarantees (no raw JSON.parse without proto-scrub).
      node -e '
        const fs = require("fs");
        const path = require("path");
        const DANGEROUS = new Set(["__proto__","constructor","prototype"]);
        function strip(v, d) {
          if (d > 256 || !v || typeof v !== "object") return v;
          if (Array.isArray(v)) { for (const x of v) strip(x, d+1); return v; }
          for (const k of Object.getOwnPropertyNames(v)) {
            if (DANGEROUS.has(k)) { delete v[k]; continue; }
            strip(v[k], d+1);
          }
          return v;
        }
        const [src, out] = process.argv.slice(1);
        let channelEntry = null;
        try {
          if (fs.existsSync(src)) {
            const cfg = JSON.parse(fs.readFileSync(src, "utf-8"));
            strip(cfg, 0);
            const ws = cfg && cfg.mcpServers && cfg.mcpServers["wogi-workspace-channel"];
            if (ws) channelEntry = ws;
          }
        } catch (_err) {}
        const payload = channelEntry
          ? { mcpServers: { "wogi-workspace-channel": channelEntry } }
          : { mcpServers: {} };
        fs.mkdirSync(path.dirname(out), { recursive: true });
        const tmp = out + ".tmp." + process.pid;
        fs.writeFileSync(tmp, JSON.stringify(payload, null, 2) + "\n");
        fs.renameSync(tmp, out);
      ' "$__wogi_member_mcp_path" "$__wogi_empty_mcp_config" 2>/dev/null || __wogi_strip_mcp=0
    fi
  else
    # node missing — last-resort fallback. Empty config = manager dispatch
    # will fail, but worker boot will still succeed.
    mkdir -p "$(dirname "$__wogi_empty_mcp_config")" 2>/dev/null
    printf '{"mcpServers":{}}\n' > "$__wogi_empty_mcp_config" 2>/dev/null || __wogi_strip_mcp=0
  fi
fi

# Init banner (C-2): workers take ~5-10s even with MCP stripped; tell the user
# it's not frozen. Interactive solo users don't see this (only workers).
if [ "$__wogi_is_worker" -eq 1 ] && [ -t 2 ]; then
  __wogi_banner_msg="[wogi-claude] worker '${WOGI_REPO_NAME}' initializing"
  if [ "$__wogi_strip_mcp" -eq 1 ]; then
    __wogi_banner_msg="$__wogi_banner_msg (claude.ai MCP integrations stripped for speed)"
  else
    __wogi_banner_msg="$__wogi_banner_msg (claude.ai MCP inherited — expect +3-7s boot)"
  fi
  echo "$__wogi_banner_msg — expected boot ~5-10s, please wait..." >&2
fi

__wogi_wants_expect=0
if [ -z "${WOGI_NO_EXPECT:-}" ]; then
  if [ "$__wogi_is_worker" -eq 1 ] || [ "${WOGI_USE_EXPECT:-}" = "1" ]; then
    __wogi_wants_expect=1
  fi
fi

__wogi_use_expect=0
if [ "$__wogi_wants_expect" -eq 1 ]; then
  # The dialog only fires when --dangerously-load-development-channels is in
  # argv; skip the expect dance otherwise.
  __wogi_has_flag=0
  for arg in "$@"; do
    if [ "$arg" = "--dangerously-load-development-channels" ]; then
      __wogi_has_flag=1
      break
    fi
  done
  if [ "$__wogi_has_flag" -eq 1 ]; then
    if command -v expect >/dev/null 2>&1 && [ -x "$WOGI_EXPECT_SCRIPT" ]; then
      __wogi_use_expect=1
      if [ "$__wogi_is_worker" -eq 1 ]; then
        echo "[wogi-claude] worker mode detected — auto-enabled expect-based dialog dismissal" >&2
      fi
    elif [ "$__wogi_is_worker" -eq 1 ]; then
      # Headless worker + missing expect = the dialog WILL deadlock this
      # worker on restart. Warn loudly so the operator can install expect,
      # but still start claude (better than failing the worker outright).
      echo "[wogi-claude] WARNING: worker mode detected (repo '${WOGI_REPO_NAME}') but 'expect' is not installed." >&2
      echo "[wogi-claude] The --dangerously-load-development-channels dialog will block this worker on the next restart." >&2
      echo "[wogi-claude] Install expect to enable headless auto-dismiss:" >&2
      echo "[wogi-claude]   macOS:           brew install expect" >&2
      echo "[wogi-claude]   Debian/Ubuntu:   apt install expect" >&2
    fi
  fi
fi

# __wogi_claude_args — emit argv with MCP-stripping flags prepended when needed.
# Uses bash global array so callers expand it with "${__wogi_claude_argv[@]+"${__wogi_claude_argv[@]}"}".
__wogi_build_argv() {
  __wogi_claude_argv=()
  if [ "$__wogi_strip_mcp" -eq 1 ] && [ -n "$__wogi_empty_mcp_config" ]; then
    __wogi_claude_argv+=(--strict-mcp-config --mcp-config "$__wogi_empty_mcp_config")
  fi
  __wogi_claude_argv+=("$@")
}

# run_claude — invoke claude, routing through expect when we can auto-dismiss
# the dev-channels dialog. Preserves stdin/stdout/stderr exactly.
#
# wf-ee4e343b: PID-alignment via bash-c-exec trick. The Stop hook's SEC-006
# check (task-boundary-reset.js:200-206) requires WOGI_WRAPPER_PID === process.ppid
# in any hook running under claude. Plain `"$CLAUDE_BIN" ...` without `exec`
# causes bash to fork: claude gets a NEW PID that does not match $$ (this bash
# wrapper's PID). The check then fails silently, breaking auto-restart for
# everyone since 2026-04-26.
#
# Fix: spawn claude through `bash -c '...'` which forks a fresh bash with its
# OWN $$, sets WOGI_WRAPPER_PID to that $$, then `exec` replaces the new bash
# with claude — preserving the same PID. Result: claude's PID equals the
# WOGI_WRAPPER_PID it inherits, and process.ppid in any hook child of claude
# equals that same value. The strict-equality SEC-006 check now holds.
#
# Why `bash -c` and not a `( ... )` subshell: in bash 3.x (macOS system bash),
# `$$` inside a `( ... )` subshell returns the OUTER shell's PID, not the
# subshell's own PID. Bash 4+ adds $BASHPID for that purpose, but we cannot
# rely on bash 4+ being installed. `bash -c` always returns its own PID via
# `$$`, regardless of version.
#
# Bash -c argv form: `bash -c COMMAND COMMAND_NAME ARG1 ARG2 ...` — COMMAND_NAME
# becomes $0 inside the script and ARG1..N become $1..$N, so `exec "$0" "$@"`
# invokes claude with all original args without quoting hazards.
#
# For expect mode, the same alignment is performed inside wogi-claude-expect.exp.
run_claude() {
  __wogi_build_argv "$@"
  if [ "$__wogi_use_expect" -eq 1 ]; then
    expect "$WOGI_EXPECT_SCRIPT" "$CLAUDE_BIN" "${__wogi_claude_argv[@]+"${__wogi_claude_argv[@]}"}"
  else
    "${WOGI_BASH_BIN:-bash}" -c 'export WOGI_WRAPPER_PID=$$; exec "$0" "$@"' "$CLAUDE_BIN" "${__wogi_claude_argv[@]+"${__wogi_claude_argv[@]}"}"
  fi
}

# --- Opt-out path: no restart loop, just exec claude once ---
for arg in "$@"; do
  if [ "$arg" = "--no-wogi-restart" ]; then
    # strip the flag from args and exec
    filtered=()
    for a in "$@"; do [ "$a" = "--no-wogi-restart" ] || filtered+=("$a"); done
    CLAUDE_BIN="${WOGI_CLAUDE_BIN:-claude}"
    __wogi_build_argv "${filtered[@]}"
    if [ "$__wogi_use_expect" -eq 1 ]; then
      exec expect "$WOGI_EXPECT_SCRIPT" "$CLAUDE_BIN" "${__wogi_claude_argv[@]+"${__wogi_claude_argv[@]}"}"
    else
      exec "$CLAUDE_BIN" "${__wogi_claude_argv[@]+"${__wogi_claude_argv[@]}"}"
    fi
  fi
done

# --- Resolve paths and config ---
CLAUDE_BIN="${WOGI_CLAUDE_BIN:-claude}"
PROJECT_ROOT="$(pwd)"
FLAG_FILE="${WOGI_RESTART_FLAG:-$PROJECT_ROOT/.workflow/state/restart-requested}"
MAX_RESTARTS="${WOGI_MAX_RESTARTS:-50}"
export WOGI_RESTART_FLAG="$FLAG_FILE"
export WOGI_WRAPPER_PID=$$

# --- Handle Ctrl+C / signals gracefully: propagate to child, don't restart on user-initiated exit ---
INTERRUPTED=0
on_interrupt() {
  INTERRUPTED=1
  # Let the child process receive the signal naturally; don't kill ourselves yet.
}
trap on_interrupt INT TERM

# --- Main loop ---
count=0
while true; do
  count=$((count + 1))
  if [ "$count" -gt "$MAX_RESTARTS" ]; then
    echo "[wogi-claude] max restarts ($MAX_RESTARTS) reached — exiting" >&2
    exit 1
  fi

  # Clear any stale flag from a prior run before starting a new claude
  # (defensive — hook should clean up after itself but if a crash left it, remove it)
  if [ "$count" -eq 1 ] && [ -f "$FLAG_FILE" ]; then
    rm -f "$FLAG_FILE"
  fi

  run_claude "$@"
  inner_exit=$?

  # If the user hit Ctrl+C (INT/TERM reached us), don't restart
  if [ "$INTERRUPTED" -eq 1 ]; then
    exit "$inner_exit"
  fi

  # If the restart flag was written, consume it and loop
  if [ -f "$FLAG_FILE" ]; then
    rm -f "$FLAG_FILE"
    # Brief informational line so user knows what just happened
    echo "[wogi-claude] task boundary — restarting with fresh context (iteration $((count + 1)))" >&2
    continue
  fi

  # No flag — claude exited for its own reasons; propagate exit code
  exit "$inner_exit"
done
