#!/usr/bin/env bash
# aTool - hooks/pre-commit
# PreToolUse hook: enforces quality standards before git commit
# Checks: TODO/FIXME leftovers, sensitive files, conventional commits, large files

set -euo pipefail

# Detect which IDE is running this hook
HOOK_IDE="claude"
if [[ -n "${CURSOR_PLUGIN_ROOT:-}" ]]; then
    HOOK_IDE="cursor"
fi

# Escape string for JSON embedding
escape_for_json() {
    local s="$1"
    s="${s//\\/\\\\}"
    s="${s//\"/\\\"}"
    s="${s//$'\n'/\\n}"
    s="${s//$'\r'/\\r}"
    s="${s//$'\t'/\\t}"
    printf '%s' "$s"
}

# Read JSON from stdin (Claude Code provides tool input on stdin)
INPUT=""
if [[ ! -t 0 ]]; then
    INPUT=$(cat)
fi

# Extract tool_name and command from tool input
TOOL_NAME=""
TOOL_INPUT=""
if command -v jq &>/dev/null && [[ -n "$INPUT" ]]; then
    TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "")
    TOOL_INPUT=$(printf '%s' "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "")
elif [[ -n "$INPUT" ]]; then
    # Fallback: extract tool_name and command using grep/sed (jq not available)
    TOOL_NAME=$(printf '%s' "$INPUT" | grep -oE '"tool_name"[[:space:]]*:[[:space:]]*"[^"]*"' 2>/dev/null | head -1 | sed 's/.*"tool_name"[[:space:]]*:[[:space:]]*"//;s/"$//' || echo "")
    TOOL_INPUT=$(printf '%s' "$INPUT" | grep -oE '"command"[[:space:]]*:[[:space:]]*"[^"]*"' 2>/dev/null | head -1 | sed 's/.*"command"[[:space:]]*:[[:space:]]*"//;s/"$//' || echo "")
fi

# Only intercept git commit commands
if [[ "$TOOL_NAME" != "Bash" ]]; then
    exit 0
fi
if [[ -z "$TOOL_INPUT" ]]; then
    exit 0
fi

# Check if this is a git commit command
TOOL_INPUT_LOWER=$(printf '%s' "$TOOL_INPUT" | tr '[:upper:]' '[:lower:]')
IS_GIT_COMMIT=false
if printf '%s' "$TOOL_INPUT_LOWER" | grep -qE 'git\s+commit'; then
    IS_GIT_COMMIT=true
fi

if ! $IS_GIT_COMMIT; then
    exit 0
fi

# ── Pre-commit checks ─────────────────────────────────────────────────────

WARNINGS=""
CHECKS_FAILED=0
PROJECT_DIR="${PWD:-}"

# v1.10.17 (P2-SEC-5): read staged files NUL-separated and into an array
# instead of word-splitting an unquoted string. The previous
# `STAGED_FILES=$(git diff --cached --name-only)` then `for file in $STAGED_FILES`
# pattern was correctness-fragile under filenames with spaces and a small
# audit-bypass risk if a malicious commit added a path with shell metachars.
# `-z` makes git emit a NUL between paths; we read into an array safely.
STAGED_FILES=()
if command -v git &>/dev/null && git rev-parse --is-inside-work-tree &>/dev/null; then
    while IFS= read -r -d '' f; do
        STAGED_FILES+=("$f")
    done < <(git diff --cached --name-only -z --diff-filter=ACM 2>/dev/null || true)
fi

# 1. Check for TODO/FIXME in staged files (AI "lazy" patterns)
TODO_COUNT=0
if [[ ${#STAGED_FILES[@]} -gt 0 ]]; then
    for file in "${STAGED_FILES[@]}"; do
        if [[ -f "$PROJECT_DIR/$file" ]]; then
            local_count=$(grep -cE '(TODO|FIXME|HACK|XXX)' "$PROJECT_DIR/$file" 2>/dev/null || echo "0")
            if [[ "$local_count" -gt 0 ]]; then
                TODO_COUNT=$((TODO_COUNT + local_count))
            fi
        fi
    done
fi

if [[ "$TODO_COUNT" -gt 0 ]]; then
    WARNINGS+="- Found ${TODO_COUNT} TODO/FIXME/HACK/XXX marker(s) in staged files. Consider resolving before commit.\n"
    CHECKS_FAILED=$((CHECKS_FAILED + 1))
fi

# 2. Check for sensitive files in staged changes
SENSITIVE_PATTERNS="(\.env$|\.env\.|credentials|\.pem$|\.key$|secret|\.p12$|\.pfx$|id_rsa|id_ed25519|\.npmrc$|\.pypirc$)"
SENSITIVE_FOUND=""
if [[ ${#STAGED_FILES[@]} -gt 0 ]]; then
    for file in "${STAGED_FILES[@]}"; do
        if printf '%s' "$file" | grep -qE "$SENSITIVE_PATTERNS" 2>/dev/null; then
            if [[ -z "$SENSITIVE_FOUND" ]]; then
                SENSITIVE_FOUND="$file"
            else
                SENSITIVE_FOUND="$SENSITIVE_FOUND, $file"
            fi
        fi
    done
fi

if [[ -n "$SENSITIVE_FOUND" ]]; then
    WARNINGS+="- BLOCKED: Sensitive file(s) detected in commit: ${SENSITIVE_FOUND}. Do NOT commit secrets.\n"
    CHECKS_FAILED=$((CHECKS_FAILED + 10))  # High severity
fi

# 3. Check conventional commit message format
# Note: PreToolUse hook runs BEFORE commit happens, so we can only extract from command line.
# However, commit message extraction from bash command is unreliable for heredoc format.
# This check is best-effort only. Use PostToolUse hook for reliable validation after commit.
COMMIT_MSG=""
if printf '%s' "$TOOL_INPUT" | grep -qE '\-m'; then
    # Try to extract message after -m flag (only works for simple quoted messages)
    COMMIT_MSG=$(printf '%s' "$TOOL_INPUT" | sed -n 's/.*-m[[:space:]]*\(['"'"'"][^'"'"'"]*['"'"'"]\|"[^"]*"\|\S\+\).*/\1/p' 2>/dev/null | head -1 || echo "")
    # Strip surrounding quotes
    COMMIT_MSG="${COMMIT_MSG#\"}"
    COMMIT_MSG="${COMMIT_MSG%\"}"
    COMMIT_MSG="${COMMIT_MSG#\'}"
    COMMIT_MSG="${COMMIT_MSG%\'}"
fi

# Only warn if message extraction succeeded AND it doesn't match pattern
# (If extraction fails, skip validation — will be checked post-commit by PostToolUse hook)
if [[ -n "$COMMIT_MSG" ]] && [[ ! "$COMMIT_MSG" =~ \$\(cat ]]; then
    CONVENTIONAL_PATTERN='^(feat|fix|docs|test|refactor|chore|style|perf|build|ci|revert|release)(\(.+\))?:'
    if ! printf '%s' "$COMMIT_MSG" | grep -qE "$CONVENTIONAL_PATTERN" 2>/dev/null; then
        WARNINGS+="- Commit message does not follow Conventional Commits: '${COMMIT_MSG}'\n"
        WARNINGS+="  Expected: feat|fix|docs|test|refactor|chore|style|perf|build|ci|revert|release[(scope)]: description\n"
        # This is a warning, not a hard block
    fi
else
    # If commit message extraction failed or detected heredoc, add note about post-validation
    if printf '%s' "$TOOL_INPUT" | grep -qE '\-m.*\$\(cat'; then
        WARNINGS+="- INFO: Using heredoc commit message format. Conventional Commits validation will be performed post-commit.\n"
    fi
fi

# 4. Check for large files (> 1MB) in staged changes
LARGE_FILES=""
if [[ ${#STAGED_FILES[@]} -gt 0 ]]; then
    for file in "${STAGED_FILES[@]}"; do
        if [[ -f "$PROJECT_DIR/$file" ]]; then
            file_size=$(wc -c < "$PROJECT_DIR/$file" 2>/dev/null || echo "0")
            # 1MB = 1048576 bytes
            if [[ "$file_size" -gt 1048576 ]]; then
                size_mb=$((file_size / 1048576))
                if [[ -z "$LARGE_FILES" ]]; then
                    LARGE_FILES="$file (${size_mb}MB)"
                else
                    LARGE_FILES="$LARGE_FILES, $file (${size_mb}MB)"
                fi
            fi
        fi
    done
fi

if [[ -n "$LARGE_FILES" ]]; then
    WARNINGS+="- WARNING: Large file(s) in commit: ${LARGE_FILES}. Consider using .gitattributes or Git LFS.\n"
fi

# ── Output ────────────────────────────────────────────────────────────────

if [[ "$CHECKS_FAILED" -gt 0 ]] || [[ -n "$WARNINGS" ]]; then
    SEVERITY="WARNING"
    if [[ "$CHECKS_FAILED" -ge 10 ]]; then
        SEVERITY="BLOCKED"
    fi

    _MSG="<ATOOL-PRE-COMMIT-CHECK>\n"
    _MSG+="${SEVERITY}: Pre-commit quality checks found issues:\n\n"
    _MSG+="$WARNINGS"
    _MSG+="\n"

    if [[ "$SEVERITY" == "BLOCKED" ]]; then
        _MSG+="**This commit is BLOCKED.** Fix the critical issues above before proceeding.\n"
    else
        _MSG+="Please review the warnings above. You may proceed if they are acceptable, but consider addressing them.\n"
    fi
    _MSG+="</ATOOL-PRE-COMMIT-CHECK>"

    if [[ "$HOOK_IDE" == "cursor" ]]; then
        _ESCAPED=$(escape_for_json "$_MSG")
        printf '{\n  "hookSpecificOutput": {\n    "hookEventName": "PreToolUse",\n    "additionalContext": "%s"\n  }\n}\n' "$_ESCAPED"
    else
        printf '%b' "$_MSG"
    fi
fi

# Exit 2 blocks tool execution in the Claude Code hook protocol.
# Only block when CHECKS_FAILED indicates a BLOCKED-severity issue
# (sensitive files such as .env, *.pem, credentials add 10 to the counter).
if [[ "${CHECKS_FAILED:-0}" -ge 10 ]]; then
    exit 2
fi
exit 0
