#!/bin/bash
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

set -e
set -u
set -o pipefail

# Source configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/config"

<% if (deploymentTarget === 'realtime-inference') { %>
# ============================================================
# SageMaker Real-Time Inference Logs (CloudWatch)
# ============================================================

# Parse arguments: ./do/logs [--ic <name>] [--adapter <name>]
IC_ARG=""
ADAPTER_ARG=""
while [ $# -gt 0 ]; do
    case "$1" in
        --ic) shift; IC_ARG="${1:-}"; shift ;;
        --adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
        --help|-h)
            echo "Usage: ./do/logs [--ic <name>] [--adapter <name>]"
            echo ""
            echo "Tail CloudWatch logs for the deployed inference component."
            echo ""
            echo "Options:"
            echo "  --ic <name>      Show logs for a specific inference component"
            echo "  --adapter <name> Show logs for a specific LoRA adapter IC"
            echo ""
            echo "IC resolution:"
            echo "  --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
            echo "  --ic <name>      Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
            echo "  (no flag)        Show all logs for the endpoint (current behavior)"
            exit 0
            ;;
        *) shift ;;
    esac
done

ENDPOINT="${ENDPOINT_NAME:-}"

if [ -z "${ENDPOINT}" ]; then
    echo "❌ ENDPOINT_NAME not set in config"
    echo ""
    echo "Usage:"
    echo "  ./do/logs --ic <name>    # logs for a specific IC"
    echo "  ./do/logs                # all endpoint logs"
    echo ""
    echo "Run ./do/deploy first to set ENDPOINT_NAME automatically."
    exit 1
fi

# Resolve inference component name for filtering
# Precedence: --adapter <name>, --ic <name>, first in do/ic/, or legacy config
IC_NAME=""
if [ -n "${ADAPTER_ARG}" ]; then
    # Adapter name provided via --adapter flag — look up adapter IC
    ADAPTER_CONF="${SCRIPT_DIR}/adapters/${ADAPTER_ARG}.conf"
    if [ ! -f "${ADAPTER_CONF}" ]; then
        echo "❌ Adapter config not found: do/adapters/${ADAPTER_ARG}.conf"
        echo "   Available adapters:"
        if [ -d "${SCRIPT_DIR}/adapters" ]; then
            for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
                [ -f "${conf}" ] || continue
                echo "     • $(basename "${conf}" .conf)"
            done
        else
            echo "     (none)"
        fi
        exit 1
    fi
    ADAPTER_IC_NAME=""
    source "${ADAPTER_CONF}"
    if [ -z "${ADAPTER_IC_NAME}" ]; then
        echo "❌ Adapter '${ADAPTER_ARG}' conf is missing ADAPTER_IC_NAME."
        exit 1
    fi
    IC_NAME="${ADAPTER_IC_NAME}"
elif [ -n "${IC_ARG}" ]; then
    # Explicit IC name provided via --ic flag
    IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
    if [ ! -f "${IC_CONF}" ]; then
        echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
        exit 1
    fi
    IC_DEPLOYED_NAME=""
    source "${IC_CONF}"
    if [ -z "${IC_DEPLOYED_NAME}" ]; then
        echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
        exit 1
    fi
    IC_NAME="${IC_DEPLOYED_NAME}"
elif [ -d "${SCRIPT_DIR}/ic" ]; then
    # No --ic argument, but do/ic/ exists — use first IC alphabetically
    for conf in "${SCRIPT_DIR}"/ic/*.conf; do
        [ -f "${conf}" ] || continue
        IC_DEPLOYED_NAME=""
        source "${conf}"
        if [ -n "${IC_DEPLOYED_NAME}" ]; then
            IC_NAME="${IC_DEPLOYED_NAME}"
            break
        fi
    done
    # If no ICs deployed, fall through to show all endpoint logs
else
    # Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
    IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
fi

# Determine log group based on whether we have an IC name
if [ -n "${IC_NAME}" ]; then
    LOG_GROUP="/aws/sagemaker/InferenceComponents/${IC_NAME}"
    echo "📋 Tailing logs for inference component: ${IC_NAME}"
    echo "   Endpoint: ${ENDPOINT}"
else
    LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"
    echo "📋 Tailing logs for endpoint: ${ENDPOINT}"
fi
echo "   Log group: ${LOG_GROUP}"
echo "   Region: ${AWS_REGION}"
echo ""
echo "   Press Ctrl+C to stop"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Wait for log group to exist before tailing
MAX_WAIT=900
INTERVAL=10
ELAPSED=0

# Try IC-specific log group first, fall back to endpoint log group
FALLBACK_LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"

while true; do
    # Check primary log group
    if aws logs describe-log-groups \
        --log-group-name-prefix "${LOG_GROUP}" \
        --region "${AWS_REGION}" \
        --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
        --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
        break
    fi

    # Check endpoint-level log group as fallback (only when targeting a specific IC)
    if [ -n "${IC_NAME}" ]; then
        if aws logs describe-log-groups \
            --log-group-name-prefix "${FALLBACK_LOG_GROUP}" \
            --region "${AWS_REGION}" \
            --query "logGroups[?logGroupName=='${FALLBACK_LOG_GROUP}'].logGroupName" \
            --output text 2>/dev/null | grep -q "${FALLBACK_LOG_GROUP}"; then
            LOG_GROUP="${FALLBACK_LOG_GROUP}"
            echo "   ℹ️  Using endpoint log group: ${LOG_GROUP}"
            break
        fi
    fi

    if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
        echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
        echo ""
        echo "   The inference component may have failed to start."
        echo "   Check the endpoint log group for errors:"
        echo "   aws logs tail /aws/sagemaker/Endpoints/${ENDPOINT} --follow --region ${AWS_REGION}"
        exit 1
    fi

    if [ "${ELAPSED}" -eq 0 ]; then
        echo "⏳ Log group not found yet: ${LOG_GROUP}"
        echo "   The inference component may still be starting up. Waiting up to ${MAX_WAIT}s..."
        echo ""
    fi

    sleep "${INTERVAL}"
    ELAPSED=$((ELAPSED + INTERVAL))
    echo "   Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
done

echo "✅ Log group found. Tailing logs..."
echo ""

# Tail logs
aws logs tail "${LOG_GROUP}" \
    --region "${AWS_REGION}" \
    --follow \
    --format short

<% } else if (deploymentTarget === 'async-inference') { %>
# ============================================================
# SageMaker Async Inference Logs (CloudWatch)
# ============================================================

ENDPOINT="${1:-${ENDPOINT_NAME:-}}"

if [ -z "${ENDPOINT}" ]; then
    echo "❌ No endpoint name provided"
    echo ""
    echo "Usage:"
    echo "  ./do/logs <endpoint-name>"
    echo "  ./do/logs                  # uses ENDPOINT_NAME from do/config"
    echo ""
    echo "Run ./do/deploy first to set ENDPOINT_NAME automatically."
    exit 1
fi

LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"

echo "📋 Tailing logs for async inference endpoint: ${ENDPOINT}"
echo "   Log group: ${LOG_GROUP}"
echo "   Region: ${AWS_REGION}"
echo ""
echo "   Press Ctrl+C to stop"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Wait for log group to exist before tailing
MAX_WAIT=900
INTERVAL=10
ELAPSED=0

while true; do
    if aws logs describe-log-groups \
        --log-group-name-prefix "${LOG_GROUP}" \
        --region "${AWS_REGION}" \
        --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
        --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
        break
    fi

    if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
        echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
        echo ""
        echo "   The endpoint may have failed to start."
        echo "   Check the SageMaker console for endpoint status."
        exit 1
    fi

    if [ "${ELAPSED}" -eq 0 ]; then
        echo "⏳ Log group not found yet: ${LOG_GROUP}"
        echo "   The endpoint may still be starting up. Waiting up to ${MAX_WAIT}s..."
        echo ""
    fi

    sleep "${INTERVAL}"
    ELAPSED=$((ELAPSED + INTERVAL))
    echo "   Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
done

echo "✅ Log group found. Tailing logs..."
echo ""

# Tail logs
aws logs tail "${LOG_GROUP}" \
    --region "${AWS_REGION}" \
    --follow \
    --format short

<% } else if (deploymentTarget === 'batch-transform') { %>
# ============================================================
# SageMaker Batch Transform Logs (CloudWatch)
# ============================================================

# Allow transform job name as argument or from config
JOB_NAME="${1:-${TRANSFORM_JOB_NAME:-}}"

if [ -z "${JOB_NAME}" ]; then
    echo "❌ No transform job name provided"
    echo ""
    echo "Usage:"
    echo "  ./do/logs <transform-job-name>"
    echo "  ./do/logs                  # uses TRANSFORM_JOB_NAME from do/config"
    echo ""
    echo "Run ./do/deploy first to set TRANSFORM_JOB_NAME automatically."
    exit 1
fi

LOG_GROUP="/aws/sagemaker/TransformJobs"

echo "📋 Tailing logs for batch transform job: ${JOB_NAME}"
echo "   Log group: ${LOG_GROUP}"
echo "   Region: ${AWS_REGION}"
echo ""
echo "   Press Ctrl+C to stop"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Wait for log group to exist before tailing
MAX_WAIT=900
INTERVAL=10
ELAPSED=0

while true; do
    if aws logs describe-log-groups \
        --log-group-name-prefix "${LOG_GROUP}" \
        --region "${AWS_REGION}" \
        --query "logGroups[?logGroupName=='${LOG_GROUP}'].logGroupName" \
        --output text 2>/dev/null | grep -q "${LOG_GROUP}"; then
        break
    fi

    if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
        echo "❌ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
        echo ""
        echo "   The transform job may not have started logging yet."
        echo "   Check the job status:"
        echo "   aws sagemaker describe-transform-job --transform-job-name ${JOB_NAME} --region ${AWS_REGION}"
        exit 1
    fi

    if [ "${ELAPSED}" -eq 0 ]; then
        echo "⏳ Log group not found yet: ${LOG_GROUP}"
        echo "   The transform job may still be starting up. Waiting up to ${MAX_WAIT}s..."
        echo ""
    fi

    sleep "${INTERVAL}"
    ELAPSED=$((ELAPSED + INTERVAL))
    echo "   Waiting for log group... (${ELAPSED}s/${MAX_WAIT}s)"
done

echo "✅ Log group found. Tailing logs..."
echo ""

# Tail logs, filtering by transform job name
aws logs tail "${LOG_GROUP}" \
    --region "${AWS_REGION}" \
    --log-stream-name-prefix "${JOB_NAME}" \
    --follow \
    --format short

<% } else if (deploymentTarget === 'hyperpod-eks') { %>
# ============================================================
# HyperPod EKS Logs (kubectl)
# ============================================================

# Allow pod selector as argument, default to app label
POD_SELECTOR="${1:-app=${PROJECT_NAME}}"

echo "📋 Tailing logs for HyperPod EKS deployment"
echo "   Cluster: ${HYPERPOD_CLUSTER_NAME}"
echo "   Namespace: ${HYPERPOD_NAMESPACE}"
echo "   Selector: ${POD_SELECTOR}"
echo "   Region: ${AWS_REGION}"
echo ""

# Get kubeconfig for HyperPod cluster
echo "🔑 Configuring kubectl for HyperPod cluster..."
KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"

EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
    --cluster-name "${HYPERPOD_CLUSTER_NAME}" \
    --region "${AWS_REGION}" \
    --query "Orchestrator.Eks.ClusterArn" \
    --output text 2>&1) || {
    echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
    echo ""
    echo "   Check that:"
    echo "   • The cluster name is correct"
    echo "   • The cluster exists in region: ${AWS_REGION}"
    echo "   • Your IAM user/role has permission to access the cluster"
    exit 4
}

EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')

if ! aws eks update-kubeconfig \
    --name "${EKS_CLUSTER_NAME}" \
    --region "${AWS_REGION}" \
    --kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
    echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
    exit 4
fi

export KUBECONFIG="${KUBECONFIG_PATH}"

# Verify cluster connectivity
if ! kubectl cluster-info &> /dev/null; then
    echo "❌ Cannot connect to HyperPod cluster"
    echo ""
    echo "   Check that:"
    echo "   • The cluster is in 'InService' status"
    echo "   • Your network can reach the cluster API server"
    exit 4
fi

# Check if any pods match the selector
POD_COUNT=$(kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l "${POD_SELECTOR}" --no-headers 2>/dev/null | wc -l || echo "0")
if [ "${POD_COUNT}" -eq 0 ]; then
    echo "⚠️  No pods found matching selector: ${POD_SELECTOR}"
    echo "   Namespace: ${HYPERPOD_NAMESPACE}"
    echo ""
    echo "   Run ./do/deploy first to create the deployment."
    echo ""
    echo "   To list all pods in the namespace:"
    echo "   kubectl get pods -n ${HYPERPOD_NAMESPACE}"
    exit 1
fi

echo "   Found ${POD_COUNT} pod(s) matching selector"
echo ""
echo "   Press Ctrl+C to stop"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Tail logs from all matching pods
kubectl logs -f -l "${POD_SELECTOR}" -n "${HYPERPOD_NAMESPACE}" --all-containers --prefix

<% } %>
