#!/usr/bin/env bash
# sovereignty-recompute — emit ARQERA capability_declaration bodies + the
# aggregate sovereignty_metrics body. Implements
# arq://doc/protocol/capability-declaration-contract-v1.
#
# Discipline: substrate-emit only. No mutations to provider state. Any
# habitat with twin CLI can run this and get the latest substrate-visible
# sovereignty snapshot.
#
# v1 scope: emits 12 load-bearing ARQERA capabilities + 1 aggregate
# metrics body. Each capability is declared with 9 fields per the
# contract. Computation is in-script (Python heredoc); future evolution
# can move computation into a substrate-side beat task.
#
# Usage:
#   scripts/sovereignty-recompute                  # emit all + aggregate
#   scripts/sovereignty-recompute --dry-run        # print payloads only
#   scripts/sovereignty-recompute --metrics-only   # skip per-capability emit
#
# Operator may invoke this on any habitat to re-snapshot sovereignty.

set -euo pipefail

DRY_RUN="false"
METRICS_ONLY="false"
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run) DRY_RUN="true"; shift ;;
    --metrics-only) METRICS_ONLY="true"; shift ;;
    --help|-h) sed -n '2,22p' "$0"; exit 0 ;;
    *) echo "unknown arg: $1" >&2; exit 2 ;;
  esac
done

if ! command -v twin >/dev/null 2>&1; then
  echo "error: twin CLI not on PATH" >&2; exit 2
fi
if ! command -v python3 >/dev/null 2>&1; then
  echo "error: python3 not on PATH" >&2; exit 2
fi

# v1 capability set — 12 load-bearing ARQERA capabilities. Each row maps
# to one arq://body/capability_declaration/<slug>-v1 emission. Substrate-
# routable: any peer reading these knows ARQERA's centralization shape.
#
# This is INTENTIONALLY a v1 starter set, not an exhaustive ontology.
# Per the contract: prefer smallest operationally meaningful evolution.
# Runtime pressure refines it.
read -r -d '' DECLARATIONS <<'PY' || true
[
  {
    "capability_slug": "substrate-emit",
    "display_name": "Substrate act / body / doc emission",
    "authority_owner": "substrate",
    "active_steward": "any-peer-with-keychain",
    "active_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "backup_steward": "any other substrate peer",
    "backup_steward_peer": "arq://body/peer/2e172c010ebcd3e8ce39cd33b34b49f6",
    "replication_factor": 2,
    "survivability_class": "critical",
    "portability_class": "trivial",
    "transfer_readiness": "ready",
    "credential_source": "keychain",
    "reconstruction_path": "substrate-readable",
    "notes": "any peer with a keychain identity can emit; the addressing service receives + signs"
  },
  {
    "capability_slug": "substrate-addressing-service",
    "display_name": "addressing.arqera.io substrate addressing service",
    "authority_owner": "substrate-cluster",
    "active_steward": "addressing.arqera.io (GKE managed)",
    "active_steward_peer": "https://addressing.arqera.io",
    "backup_steward": "none",
    "backup_steward_peer": "none",
    "replication_factor": 1,
    "survivability_class": "critical",
    "portability_class": "hard",
    "transfer_readiness": "blocked",
    "credential_source": "infra-vault",
    "reconstruction_path": "infra-team-only",
    "notes": "single-cluster managed service; substrate's own addressing layer; needs arqera-infra reconstruction"
  },
  {
    "capability_slug": "mesh-coordinator",
    "display_name": "libp2p mesh coordinator for substrate-mesh transport",
    "authority_owner": "operator",
    "active_steward": "none-currently",
    "active_steward_peer": "none",
    "backup_steward": "Mac (was active, retired)",
    "backup_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "replication_factor": 0,
    "survivability_class": "important",
    "portability_class": "bounded",
    "transfer_readiness": "blocked",
    "credential_source": "keychain",
    "reconstruction_path": "substrate-readable-with-cred-rotation",
    "notes": "Homebase reports mesh-coordinator=blocked due to legacy-VPN pin; substrate-evidence: arq://body/peer_routing_readiness/2e172c010ebcd3e8ce39cd33b34b49f6-current"
  },
  {
    "capability_slug": "cloudflared-tunnel",
    "display_name": "Cloudflare tunnel for inbound substrate dispatch",
    "authority_owner": "operator",
    "active_steward": "Mac launchd io.arqera.cloudflared",
    "active_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "backup_steward": "none",
    "backup_steward_peer": "none",
    "replication_factor": 1,
    "survivability_class": "critical",
    "portability_class": "bounded",
    "transfer_readiness": "partial",
    "credential_source": "keychain",
    "reconstruction_path": "substrate-readable-with-cred-rotation",
    "notes": "single point of failure; if Mac disappears, inbound substrate dispatch from external networks breaks; doc reference: cloudflare-tunnel-v1"
  },
  {
    "capability_slug": "github-merge-via-substrate",
    "display_name": "arq-github pr merge-via-substrate dispatch",
    "authority_owner": "substrate",
    "active_steward": "Mac (sole GH token holder)",
    "active_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "backup_steward": "any peer with rotated GH token",
    "backup_steward_peer": "none-currently",
    "replication_factor": 1,
    "survivability_class": "important",
    "portability_class": "trivial",
    "transfer_readiness": "partial",
    "credential_source": "keychain",
    "reconstruction_path": "substrate-readable-with-cred-rotation",
    "notes": "Mac is currently the only peer with a gh keyring token; rotation policy needed for any other habitat to dispatch merges"
  },
  {
    "capability_slug": "staging-k8s-cluster",
    "display_name": "GKE arqera-staging cluster",
    "authority_owner": "release_operator",
    "active_steward": "GKE managed",
    "active_steward_peer": "gke://arqera-staging",
    "backup_steward": "none",
    "backup_steward_peer": "none",
    "replication_factor": 1,
    "survivability_class": "critical",
    "portability_class": "hard",
    "transfer_readiness": "blocked",
    "credential_source": "infra-vault",
    "reconstruction_path": "infra-team-only",
    "notes": "managed by arqera-infra; reconstruction requires GCP project + manifests"
  },
  {
    "capability_slug": "modal-tier0-inference",
    "display_name": "Modal-hosted self-tuned Qwen3 inference (5 fine-tunes)",
    "authority_owner": "operator",
    "active_steward": "gashiru--arqera-inference-api.modal.run",
    "active_steward_peer": "https://gashiru--arqera-inference-api.modal.run",
    "backup_steward": "Cerebras tier-1 fallback",
    "backup_steward_peer": "vendor:cerebras",
    "replication_factor": 2,
    "survivability_class": "important",
    "portability_class": "bounded",
    "transfer_readiness": "partial",
    "credential_source": "vault",
    "reconstruction_path": "substrate-readable-with-cred-rotation",
    "notes": "tier-routed fallback to Cerebras/Anthropic is wired but not load-tested under primary outage"
  },
  {
    "capability_slug": "github-actions-ci",
    "display_name": "GitHub Actions CI for ARQERA repos",
    "authority_owner": "release_operator",
    "active_steward": "github.com (Arqera-IO org)",
    "active_steward_peer": "https://github.com/Arqera-IO",
    "backup_steward": "none",
    "backup_steward_peer": "none",
    "replication_factor": 1,
    "survivability_class": "critical",
    "portability_class": "vendor-locked",
    "transfer_readiness": "blocked",
    "credential_source": "vendor-direct",
    "reconstruction_path": "vendor-only",
    "notes": "all PR CI runs through GitHub Actions; vendor outage stops merges entirely"
  },
  {
    "capability_slug": "operator-approval-authority",
    "display_name": "Operator (gashiru) ability to grant substrate-attested approvals",
    "authority_owner": "operator",
    "active_steward": "gashiru@live.co.uk",
    "active_steward_peer": "operator-self",
    "backup_steward": "none",
    "backup_steward_peer": "none",
    "replication_factor": 1,
    "survivability_class": "critical",
    "portability_class": "vendor-locked",
    "transfer_readiness": "blocked",
    "credential_source": "operator-self",
    "reconstruction_path": "operator-only",
    "notes": "single-operator authority root; legitimate by design but every habitat carries this dependency"
  },
  {
    "capability_slug": "mesh-enforce-hook",
    "display_name": "mesh-enforce.sh local hook (governance gate)",
    "authority_owner": "substrate",
    "active_steward": "Mac ~/.arqera/hooks/mesh-enforce.sh",
    "active_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "backup_steward": "Homebase (equivalent hook expected)",
    "backup_steward_peer": "arq://body/peer/2e172c010ebcd3e8ce39cd33b34b49f6",
    "replication_factor": 2,
    "survivability_class": "important",
    "portability_class": "trivial",
    "transfer_readiness": "ready",
    "credential_source": "n/a",
    "reconstruction_path": "substrate-readable",
    "notes": "hook source is substrate-readable; each habitat installs its own; 8+ false-positive classes accumulated"
  },
  {
    "capability_slug": "act-queue-flush",
    "display_name": "Local act-queue drain into substrate",
    "authority_owner": "substrate",
    "active_steward": "Mac launchd io.arqera.twin-flush",
    "active_steward_peer": "arq://body/peer/578412e7b083b40e56e228779804582a",
    "backup_steward": "Homebase systemd (expected)",
    "backup_steward_peer": "arq://body/peer/2e172c010ebcd3e8ce39cd33b34b49f6",
    "replication_factor": 2,
    "survivability_class": "nice-to-have",
    "portability_class": "trivial",
    "transfer_readiness": "ready",
    "credential_source": "keychain",
    "reconstruction_path": "substrate-readable",
    "notes": "per-peer local-act drain; each habitat needs its own; substrate is canonical receiver"
  },
  {
    "capability_slug": "frontend-substrate-health-surface",
    "display_name": "/system/substrate-health operator UAT surface on staging",
    "authority_owner": "substrate",
    "active_steward": "staging.arqera.io (blue/green)",
    "active_steward_peer": "gke://arqera-staging/arqera-backend-blue",
    "backup_steward": "production (when launched)",
    "backup_steward_peer": "none-currently",
    "replication_factor": 1,
    "survivability_class": "important",
    "portability_class": "bounded",
    "transfer_readiness": "ready",
    "credential_source": "n/a",
    "reconstruction_path": "substrate-readable",
    "notes": "rebuild via deploy-staging.yml workflow; code lives in ARQERA repo"
  }
]
PY

# Wave-1 augmentation: per-capability customer_mode_handling +
# decentralization_plan. Layered onto declarations at compute time so
# the v1 base shape remains forward-compatible.
read -r -d '' CUSTOMER_MODE_MAP <<'PY' || true
{
  "substrate-emit": "ok-with-per-user-identity — customer Mac emits as its own peer; substrate-truth is the same shape",
  "substrate-addressing-service": "read-only-via-https — customer Mac uses the service; does not host it",
  "mesh-coordinator": "NOT-ON-CUSTOMER-MAC — coordinator role is org-level; customer routes via addressing service",
  "cloudflared-tunnel": "NEVER-ON-CUSTOMER-MAC — org-tier inbound tunnel credential",
  "github-merge-via-substrate": "NEVER-ON-CUSTOMER-MAC — org-tier merge dispatch credential",
  "staging-k8s-cluster": "n/a — customer Mac does not interact directly",
  "modal-tier0-inference": "ok-via-api — customer Mac calls inference API; does not host model",
  "github-actions-ci": "n/a — customer Mac does not dispatch CI",
  "operator-approval-authority": "n/a — operator authority is not a customer-Mac capability",
  "mesh-enforce-hook": "optional — customer-mode substrate citizen may install for governance + audit",
  "act-queue-flush": "per-peer — customer Mac runs its own twin-flush against substrate",
  "frontend-substrate-health-surface": "via-browser — customer Mac visits the URL; does not host"
}
PY
read -r -d '' DECENTRALIZATION_PLAN_MAP <<'PY' || true
{
  "substrate-emit": {"plan_class": "expand-replication", "notes": "extend from 2 known peers to >=3 by onboarding DGX + cloud worker"},
  "substrate-addressing-service": {"plan_class": "infra-cluster-scaling", "notes": "operator-tier; multi-region addressing-service replicas; arqera-infra-owned"},
  "mesh-coordinator": {"plan_class": "rotation-protocol", "doc": "arq://doc/survivability_plan/mesh-coordinator-v1"},
  "cloudflared-tunnel": {"plan_class": "second-tunnel-on-non-mac", "doc": "arq://doc/survivability_plan/cloudflared-tunnel-v1"},
  "github-merge-via-substrate": {"plan_class": "per-habitat-short-lived-token", "doc": "arq://doc/survivability_plan/github-merge-via-substrate-v1"},
  "staging-k8s-cluster": {"plan_class": "infra-team-multi-region", "notes": "deferred to arqera-infra Wave 2+"},
  "modal-tier0-inference": {"plan_class": "load-test-failover", "notes": "tier-routed fallback to Cerebras/Anthropic exists; needs operational load-test under primary outage"},
  "github-actions-ci": {"plan_class": "vendor-locked", "notes": "vendor outage acceptance; mitigation = self-hosted runner backup (deferred)"},
  "operator-approval-authority": {"plan_class": "multi-operator-cosign", "notes": "future primitive; substrate would attest cosigned approvals when >=2 operator-class identities sign"},
  "mesh-enforce-hook": {"plan_class": "per-peer-install", "notes": "Homebase already installs; ready when Homebase emits capability_used"},
  "act-queue-flush": {"plan_class": "per-peer-install", "notes": "design is per-peer; survival is automatic"},
  "frontend-substrate-health-surface": {"plan_class": "production-deploy-when-launched", "notes": "single staging cluster today; production replication on launch"}
}
PY

# Compute aggregate metrics from the declarations + emit. The python
# heredoc reads DECLARATIONS_JSON/DRY_RUN/METRICS_ONLY/CUSTOMER_MODE_MAP/
# DECENTRALIZATION_PLAN_MAP from env, augments each declaration with the
# Wave-1 fields, emits per-capability bodies (unless --metrics-only),
# computes the metrics, and emits arq://body/sovereignty_metrics/aggregate-current.

DECLARATIONS_JSON="$DECLARATIONS" DRY_RUN="$DRY_RUN" METRICS_ONLY="$METRICS_ONLY" \
  CUSTOMER_MODE_MAP_JSON="$CUSTOMER_MODE_MAP" \
  DECENTRALIZATION_PLAN_MAP_JSON="$DECENTRALIZATION_PLAN_MAP" \
  python3 - <<'PY'
import json, os, sys, subprocess, datetime

decls = json.loads(os.environ["DECLARATIONS_JSON"])
customer_mode = json.loads(os.environ.get("CUSTOMER_MODE_MAP_JSON", "{}"))
decentralization_plan = json.loads(os.environ.get("DECENTRALIZATION_PLAN_MAP_JSON", "{}"))

# Augment each declaration with Wave-1 fields
for d in decls:
    slug = d["capability_slug"]
    if slug in customer_mode:
        d["customer_mode_handling"] = customer_mode[slug]
    if slug in decentralization_plan:
        d["decentralization_plan"] = decentralization_plan[slug]
dry_run = os.environ.get("DRY_RUN") == "true"
metrics_only = os.environ.get("METRICS_ONLY") == "true"

emitted = []
if not metrics_only:
    for d in decls:
        slug = d["capability_slug"]
        d_full = dict(d)
        d_full["schema_version"] = 1
        d_full["emitted_at"] = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        payload = json.dumps(d_full)
        if dry_run:
            print(f"DRY would-emit: arq://body/capability_declaration/{slug}-v1 ({len(payload)} bytes)")
            continue
        cmd = [
            "twin", "--use-keychain", "act", "emit", "body",
            "capability_declaration", f"{slug}-v1",
            "--payload", payload,
            "--source", "twin-sovereignty-recompute",
            "--sync",
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"FAIL emit {slug}: {result.stderr[:200]}", file=sys.stderr)
            sys.exit(3)
        emitted.append(f"arq://body/capability_declaration/{slug}-v1")

n = len(decls)
critical = [d for d in decls if d["survivability_class"] == "critical"]
crit_n = len(critical)
single_rep = [d for d in decls if d.get("replication_factor", 0) <= 1]
capability_concentration_score = round(len(single_rep) / n if n else 0, 3)
# Sentry MEDIUM on #3911: count distinct active-steward PEERS (not display
# names) — two Mac-managed services with different display strings should
# not inflate the score if they share the same peer fingerprint.
distinct_steward_peers = {
    (d.get("active_steward_peer") or d["active_steward"]) for d in decls
}
stewardship_distribution_score = round(len(distinct_steward_peers) / n if n else 0, 3)
authority_tally = {}
for d in decls:
    key = (d["authority_owner"], d.get("active_steward_peer", "?"))
    authority_tally[key] = authority_tally.get(key, 0) + 1
authority_locality_score = round(max(authority_tally.values()) / n if n else 0, 3)
if crit_n:
    survivable_crit = [c for c in critical if c.get("replication_factor", 0) >= 2 and c.get("transfer_readiness") == "ready"]
    survivability_score = round(len(survivable_crit) / crit_n, 3)
    bottleneck_crit = [c for c in critical if c.get("replication_factor", 0) <= 1]
    bottleneck_pressure_score = round(len(bottleneck_crit) / crit_n, 3)
else:
    survivability_score = 1.0
    bottleneck_pressure_score = 0.0
avg_replication_factor = round(sum(d.get("replication_factor", 0) for d in decls) / n if n else 0, 3)
ready_n = len([d for d in decls if d.get("transfer_readiness") == "ready"])
transfer_readiness_pct = round(ready_n / n if n else 0, 3)
peer_critical_count = {}
for c in critical:
    p = c.get("active_steward_peer", "?")
    if p and p != "none":
        peer_critical_count[p] = peer_critical_count.get(p, 0) + 1
habitats_at_risk = sorted(
    [{"peer": p, "critical_caps_owned": k} for p, k in peer_critical_count.items() if k >= 2],
    key=lambda r: -r["critical_caps_owned"],
)
spof = [
    {"slug": d["capability_slug"], "active_steward": d["active_steward"], "survivability_class": d["survivability_class"]}
    for d in decls
    if d.get("replication_factor", 0) <= 1
]
metrics = {
    "schema_version": 1,
    "computed_at": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
    "computed_by_script": "scripts/sovereignty-recompute",
    "contract_doc": "arq://doc/protocol/capability-declaration-contract-v1",
    "declared_capabilities_count": n,
    "critical_count": crit_n,
    "capability_concentration_score": capability_concentration_score,
    "authority_locality_score": authority_locality_score,
    "survivability_score": survivability_score,
    "stewardship_distribution_score": stewardship_distribution_score,
    "bottleneck_pressure_score": bottleneck_pressure_score,
    "avg_replication_factor": avg_replication_factor,
    "transfer_readiness_pct": transfer_readiness_pct,
    "habitats_at_risk": habitats_at_risk,
    "single_point_of_failure_capabilities": spof,
    "emitted_declarations": emitted,
}
if dry_run:
    print(json.dumps(metrics, indent=2))
    sys.exit(0)
mpayload = json.dumps(metrics)
cmd = [
    "twin", "--use-keychain", "act", "emit", "body",
    "sovereignty_metrics", "aggregate-current",
    "--payload", mpayload,
    "--source", "twin-sovereignty-recompute-metrics",
    "--sync",
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
    print(f"FAIL emit metrics: {result.stderr[:200]}", file=sys.stderr)
    sys.exit(4)
print(f"emitted: arq://body/sovereignty_metrics/aggregate-current")
print(f"  declared_capabilities: {n} ({crit_n} critical)")
print(f"  capability_concentration_score: {capability_concentration_score}  (1.0 = fully centralized)")
print(f"  survivability_score: {survivability_score}  (1.0 = every critical has replication >= 2 AND transfer=ready)")
print(f"  bottleneck_pressure_score: {bottleneck_pressure_score}  (1.0 = every critical is single-point-of-failure)")
print(f"  avg_replication_factor: {avg_replication_factor}")
print(f"  transfer_readiness_pct: {transfer_readiness_pct}")
print(f"  habitats_at_risk: {len(habitats_at_risk)}")
print(f"  single_point_of_failure_capabilities: {len(spof)}")
PY
