#!/bin/bash
# do-framework configuration
# This file is sourced by all do scripts
# Generated: <%= new Date().toISOString() %>

# Project identification
export PROJECT_NAME="<%= projectName %>"
export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"

# Derived from deployment config
export FRAMEWORK="<%= framework %>"
export MODEL_SERVER="<%= modelServer %>"

# AWS configuration
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}

# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh

<% if (typeof enableLora !== 'undefined' && enableLora) { %>
# LoRA adapter serving
export ENABLE_LORA=true
<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
# LoRA adapter serving (uncomment to enable)
# export ENABLE_LORA=true
<% } %>

# Build configuration — WHERE the Docker image gets built
export BUILD_TARGET="<%= buildTarget %>"
<% if (buildTarget === 'codebuild') { %>
export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
# CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
<% } %>

# Deployment configuration — WHERE the model runs
export DEPLOYMENT_TARGET="<%= deploymentTarget %>"

<% if (deploymentTarget === 'realtime-inference') { %>
# SageMaker Real-Time Inference configuration
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
# External endpoint — attaching IC to an existing running endpoint
export ENDPOINT_NAME="<%= existingEndpointName %>"
export ENDPOINT_EXTERNAL=true
<% } else { %>
export INSTANCE_TYPE="<%= instanceType %>"
<% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
# Instance pools: heterogeneous instance types with priority-based fallback
# Priority = selection order (1 = preferred, higher = fallback)
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
<% } else { %>
# Instance pools: heterogeneous instance types with priority-based fallback (uncomment to enable)
# Format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},{"InstanceType":"ml.g5.48xlarge","Priority":2}]
# export INSTANCE_POOLS='[]'
<% } %>
<% if (inferenceAmiVersion) { %>
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
<% } else { %>
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
# Valid: al2-ami-sagemaker-inference-gpu-2, al2-ami-sagemaker-inference-gpu-2-1,
#        al2-ami-sagemaker-inference-gpu-3-1, al2023-ami-sagemaker-inference-gpu-4-1
# export INFERENCE_AMI_VERSION=""
<% } %>
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
<% } else { %>
# Capacity reservation (uncomment to use reserved capacity)
# Note: Mutually exclusive with INSTANCE_POOLS — reservation takes precedence.
# export CAPACITY_RESERVATION_ARN=""
<% } %>
<% } %>
<% } %>

<% if (deploymentTarget === 'async-inference') { %>
# SageMaker Async Inference configuration
export INSTANCE_TYPE="<%= instanceType %>"
<% if (inferenceAmiVersion) { %>
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
<% } else { %>
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
# export INFERENCE_AMI_VERSION=""
<% } %>

# Async-specific configuration
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)

<% if (asyncMaxConcurrentInvocations) { %>
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
<% } else { %>
# Max concurrent invocations per instance (uncomment to set)
# export ASYNC_MAX_CONCURRENT_INVOCATIONS=""
<% } %>
<% } %>

<% if (deploymentTarget === 'hyperpod-eks') { %>
# HyperPod EKS configuration
export HYPERPOD_CLUSTER_NAME="<%= hyperPodCluster %>"
export HYPERPOD_NAMESPACE="<%= hyperPodNamespace %>"
export HYPERPOD_REPLICAS="<%= hyperPodReplicas %>"
<% if (fsxVolumeHandle) { %>
export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
<% } else { %>
# FSx for Lustre volume for shared model storage (uncomment to enable)
# export FSX_VOLUME_HANDLE=""
<% } %>
<% } %>

<% if (deploymentTarget === 'batch-transform') { %>
# SageMaker Batch Transform configuration
export INSTANCE_TYPE="<%= instanceType %>"

# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)

export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
export BATCH_STRATEGY="<%= batchStrategy %>"
export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
<% if (batchMaxConcurrentTransforms) { %>
export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
<% } else { %>
# Max concurrent transforms per instance (uncomment to set)
# export BATCH_MAX_CONCURRENT_TRANSFORMS=""
<% } %>
<% if (batchMaxPayloadInMB) { %>
export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
<% } else { %>
# Max payload size in MB (uncomment to set, default: 6)
# export BATCH_MAX_PAYLOAD_IN_MB=""
<% } %>
<% } %>

<% if (typeof endpointInitialInstanceCount !== 'undefined' && endpointInitialInstanceCount != null) { %>
export ENDPOINT_INITIAL_INSTANCE_COUNT="<%= endpointInitialInstanceCount %>"
<% } %>
<% if (typeof endpointDataCapturePercent !== 'undefined' && endpointDataCapturePercent != null) { %>
export ENDPOINT_DATA_CAPTURE_PERCENT="<%= endpointDataCapturePercent %>"
<% } %>
<% if (typeof endpointVariantName !== 'undefined' && endpointVariantName != null) { %>
export ENDPOINT_VARIANT_NAME="<%= endpointVariantName %>"
<% } %>
<% if (typeof endpointVolumeSize !== 'undefined' && endpointVolumeSize != null) { %>
export ENDPOINT_VOLUME_SIZE="<%= endpointVolumeSize %>"
<% } %>

<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
# ─── Endpoint overrides (uncomment to customize) ───────────────────────────────
<% if (typeof endpointInitialInstanceCount === 'undefined' || endpointInitialInstanceCount == null) { %>
# export ENDPOINT_INITIAL_INSTANCE_COUNT="1"    # Number of instances for the endpoint
<% } %>
<% if (typeof endpointDataCapturePercent === 'undefined' || endpointDataCapturePercent == null) { %>
# export ENDPOINT_DATA_CAPTURE_PERCENT=""        # Percentage of requests to capture (0-100)
<% } %>
<% if (typeof endpointVariantName === 'undefined' || endpointVariantName == null) { %>
# export ENDPOINT_VARIANT_NAME=""                # Custom variant name (default: AllTraffic)
<% } %>
<% if (typeof endpointVolumeSize === 'undefined' || endpointVolumeSize == null) { %>
# export ENDPOINT_VOLUME_SIZE=""                 # EBS volume size in GB for model download
<% } %>
<% } %>

<% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
export IC_CPU_COUNT="<%= icCpuCount %>"
<% } %>
<% if (typeof icMemorySize !== 'undefined' && icMemorySize != null) { %>
export IC_MEMORY_SIZE="<%= icMemorySize %>"
<% } %>
<% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
export IC_GPU_COUNT="<%= icGpuCount %>"
<% } else { %>
export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
<% } %>
<% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
export IC_COPY_COUNT="<%= icCopyCount %>"
<% } %>
<% if (typeof icModelWeight !== 'undefined' && icModelWeight != null) { %>
export IC_MODEL_WEIGHT="<%= icModelWeight %>"
<% } %>

<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
# ─── Inference Component overrides (uncomment to customize) ────────────────────
<% if (typeof icCpuCount === 'undefined' || icCpuCount == null) { %>
# export IC_CPU_COUNT=""                         # CPU cores reserved for this IC
<% } %>
<% if (typeof icMemorySize === 'undefined' || icMemorySize == null) { %>
# export IC_MEMORY_SIZE=""                       # Memory in MB reserved for this IC
<% } %>
<% if (typeof icCopyCount === 'undefined' || icCopyCount == null) { %>
# export IC_COPY_COUNT=""                        # Number of model copies (multi-IC scaling)
<% } %>
<% if (typeof icModelWeight === 'undefined' || icModelWeight == null) { %>
# export IC_MODEL_WEIGHT=""                      # Traffic weight for this IC (0-100)
<% } %>
<% } %>

<% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
# Model environment variables
<% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
export <%= key %>=${<%= key %>:-<%= value %>}
<% }); %>
<% } %>

<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
# Server environment variables
<% Object.entries(serverEnvVars).forEach(([key, value]) => { %>
export <%= key %>=${<%= key %>:-<%= value %>}
<% }); %>
<% } %>

# Framework-specific configuration
<% if (framework === 'transformers') { %>
export MODEL_NAME="<%= modelName %>"
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
# secret at the appropriate stage (build-time or runtime). When a plaintext value
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
export HF_TOKEN_ARN="<%= hfTokenArn %>"
<% } else if (hfToken) { %>
export HF_TOKEN="<%= hfToken %>"
<% } %>
<% if (typeof ngcTokenArn !== 'undefined' && ngcTokenArn) { %>
export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
<% } else if (ngcApiKey) { %>
export NGC_API_KEY="<%= ngcApiKey %>"
<% } %>

<% if (deploymentTarget !== 'batch-transform') { %>
# Managed Model Customization (do/tune)
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
# TUNE_S3_BUCKET — see do/lib/profile.sh
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
# Flow: JumpStart model (tune) → LoRA adapter (S3) → do/adapter add → vLLM
export TUNE_MODEL_ID="<%= tuneModelId %>"
<% } else { %>
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
# To find your model's Hub ID:
#   aws sagemaker list-hub-contents --hub-name SageMakerPublicHub \
#     --hub-content-type Model --query "HubContentSummaries[].HubContentName"
# export TUNE_MODEL_ID=""
<% } %>
<% } %>
# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
# export MLFLOW_APP_ARN=""
<% } %>
<% } %>

<% if (framework === 'diffusors') { %>
export MODEL_NAME="<%= modelName %>"
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
# secret at the appropriate stage (build-time or runtime). When a plaintext value
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
export HF_TOKEN_ARN="<%= hfTokenArn %>"
<% } else if (hfToken) { %>
export HF_TOKEN="<%= hfToken %>"
<% } %>
<% } %>

<% if (modelFormat) { %>
export MODEL_FORMAT="<%= modelFormat %>"
<% } else { %>
# Model format (uncomment if using quantized models)
# Valid: pkl, json, keras, safetensors, gguf, awq, gptq
# export MODEL_FORMAT=""
<% } %>

<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
# SageMaker AI Benchmarking configuration
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
<% if (benchmarkRequestCount) { %>
export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
<% } else { %>
export BENCHMARK_REQUEST_COUNT=""
<% } %>
<% if (benchmarkS3OutputPath) { %>
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
<% } else { %>
export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
<% } %>
export BENCHMARK_JOB_NAME=""
export BENCHMARK_WORKLOAD_CONFIG_NAME=""

# CI Benchmark Athena persistence (set automatically from bootstrap --benchmark-infra)
<% if (typeof ciBenchmarkResultsBucket !== 'undefined' && ciBenchmarkResultsBucket) { %>
export CI_BENCHMARK_RESULTS_BUCKET="<%= ciBenchmarkResultsBucket %>"
<% } else { %>
# export CI_BENCHMARK_RESULTS_BUCKET=""            # S3 bucket for Athena Parquet results (set by bootstrap --benchmark-infra)
<% } %>
<% } else if (framework === 'transformers' && deploymentTarget !== 'batch-transform') { %>
# ─── SageMaker AI Benchmarking (uncomment to enable) ──────────────────────────
# export BENCHMARK_CONCURRENCY="10"              # Concurrent requests
# export BENCHMARK_INPUT_TOKENS_MEAN="550"       # Mean input tokens per request
# export BENCHMARK_OUTPUT_TOKENS_MEAN="150"      # Mean output tokens per request
# export BENCHMARK_STREAMING="true"              # Enable streaming
# export BENCHMARK_REQUEST_COUNT=""              # Total requests (empty = auto)
# export BENCHMARK_S3_OUTPUT_PATH=""             # S3 path for results (empty = auto)
# export BENCHMARK_JOB_NAME=""                   # Resume/check existing job
# export BENCHMARK_WORKLOAD_CONFIG_NAME=""       # Reuse existing workload config
<% } %>

<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
# Runtime environment variables (from catalog)
<% orderedEnvVars.forEach(({ key, value }) => { %>
export <%= key %>=${<%= key %>:-<%= value %>}
<% }); %>
<% } %>

export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}

# Allow environment variable overrides
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
<% } %>

# Print configuration summary
echo "⚙️  Configuration loaded"
echo "   Project: ${PROJECT_NAME}"
echo "   Config:  ${DEPLOYMENT_CONFIG}"
echo "   Region:  ${AWS_REGION}"
echo "   Build target: ${BUILD_TARGET}"
echo "   Deployment target: ${DEPLOYMENT_TARGET}"
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
echo "   Runtime env vars: <%= orderedEnvVars.length %>"
<% } %>
<% if ((typeof endpointInitialInstanceCount !== 'undefined' && endpointInitialInstanceCount != null) || (typeof endpointDataCapturePercent !== 'undefined' && endpointDataCapturePercent != null) || (typeof endpointVariantName !== 'undefined' && endpointVariantName != null) || (typeof endpointVolumeSize !== 'undefined' && endpointVolumeSize != null)) { %>
echo "   Endpoint config:"
<% if (typeof endpointInitialInstanceCount !== 'undefined' && endpointInitialInstanceCount != null) { %>
echo "     Initial instance count: ${ENDPOINT_INITIAL_INSTANCE_COUNT}"
<% } %>
<% if (typeof endpointDataCapturePercent !== 'undefined' && endpointDataCapturePercent != null) { %>
echo "     Data capture percent: ${ENDPOINT_DATA_CAPTURE_PERCENT}"
<% } %>
<% if (typeof endpointVariantName !== 'undefined' && endpointVariantName != null) { %>
echo "     Variant name: ${ENDPOINT_VARIANT_NAME}"
<% } %>
<% if (typeof endpointVolumeSize !== 'undefined' && endpointVolumeSize != null) { %>
echo "     Volume size: ${ENDPOINT_VOLUME_SIZE} GB"
<% } %>
<% } %>
<% if ((typeof icCpuCount !== 'undefined' && icCpuCount != null) || (typeof icMemorySize !== 'undefined' && icMemorySize != null) || (typeof icGpuCount !== 'undefined' && icGpuCount != null) || (typeof icCopyCount !== 'undefined' && icCopyCount != null) || (typeof icModelWeight !== 'undefined' && icModelWeight != null)) { %>
echo "   IC config:"
<% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
echo "     CPU count: ${IC_CPU_COUNT}"
<% } %>
<% if (typeof icMemorySize !== 'undefined' && icMemorySize != null) { %>
echo "     Memory size: ${IC_MEMORY_SIZE} MB"
<% } %>
<% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
echo "     GPU count: ${IC_GPU_COUNT}"
<% } %>
<% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
echo "     Copy count: ${IC_COPY_COUNT}"
<% } %>
<% if (typeof icModelWeight !== 'undefined' && icModelWeight != null) { %>
echo "     Model weight: ${IC_MODEL_WEIGHT}"
<% } %>
<% } %>
<% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
echo "   Model env vars: <%= Object.keys(modelEnvVars).length %>"
<% } %>
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
echo "   Server env vars: <%= Object.keys(serverEnvVars).length %>"
<% } %>
<% if (deploymentTarget === 'realtime-inference') { %>
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
echo "   Endpoint: ${ENDPOINT_NAME} (external)"
<% } else { %>
echo "   Instance: ${INSTANCE_TYPE}"
<% } %>
<% } else if (deploymentTarget === 'async-inference') { %>
echo "   Instance: ${INSTANCE_TYPE}"
echo "   S3 output: ${ASYNC_S3_OUTPUT_PATH}"
echo "   SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
echo "   SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
<% } else if (deploymentTarget === 'batch-transform') { %>
echo "   Instance: ${INSTANCE_TYPE} x ${BATCH_INSTANCE_COUNT}"
echo "   S3 input: ${BATCH_INPUT_PATH}"
echo "   S3 output: ${BATCH_OUTPUT_PATH}"
echo "   Split type: ${BATCH_SPLIT_TYPE}"
echo "   Strategy: ${BATCH_STRATEGY}"
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
echo "   HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
echo "   Namespace: ${HYPERPOD_NAMESPACE}"
<% } %>
