#!/usr/bin/env bash

set -euo pipefail

if [ $# -lt 1 ]; then
    echo "Usage: get-pr-comments PR_NUMBER [OWNER/REPO]"
    echo "Example: get-pr-comments 123"
    echo "Example: get-pr-comments 123 OWNER/REPO"
    exit 1
fi

PR_NUMBER=$1

if [ -n "${2:-}" ]; then
    OWNER=$(echo "$2" | cut -d/ -f1)
    REPO=$(echo "$2" | cut -d/ -f2)
else
    OWNER=$(gh repo view --json owner -q .owner.login 2>/dev/null)
    REPO=$(gh repo view --json name -q .name 2>/dev/null)
fi

if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
    echo "Error: Could not detect repository. Pass OWNER/REPO as second argument."
    exit 1
fi

# Output is a JSON object with four keys:
#   review_threads   - unresolved inline review threads, edge-wrapped as
#                      [{ node: { id, isResolved, isOutdated, path, line, ...,
#                                 comments: { nodes: [...], pageInfo: ... } } }]
#   pr_comments      - top-level PR conversation comments (excludes PR author and known review bots)
#   review_bodies    - review submissions with non-empty body text (excludes PR author and known review bots)
#   fetch_warnings   - deterministic warnings about incomplete nested data,
#                      such as per-thread comments beyond the fetched first 100.
#
# Pagination: each top-level connection -- reviewThreads, comments, reviews --
# is fetched in its own paginated query because `gh api graphql --paginate`
# only follows the outermost pageInfo per response. Keeping these connections
# in one query silently drops later pages on long-lived PRs.
#
# Per-thread inline `comments` are fetched up to 100 comments. Nested comment
# pagination is intentionally not guessed in this script; when a thread has
# more comments, the output carries fetch_warnings so the resolver treats
# absence as incomplete evidence rather than confirmed absence.
#
# Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source.
# Their output is structurally never actionable -- coverage numbers, build
# summaries, deploy status -- and that holds regardless of format changes.
# AI review bots (coderabbitai, codex, gemini, copilot) are NOT filtered here.
# Historically their top-level comments were assumed to always be wrappers, but
# that turned out to be wrong: Codex sometimes posts actionable findings as
# top-level PR comments with no inline thread counterpart. Any source-level
# heuristic to separate wrapper from actionable for these bots is brittle (one
# bot format change away from silently dropping feedback). SKILL.md step 2
# has a content-aware actionability check and Silent Drop rule that handles
# wrappers correctly, so we trust that layer instead. Add new logins to the CI
# list only if their output is structurally non-actionable like codecov's.
threads_pages=$(gh api graphql --paginate --slurp \
  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
  -f query='
query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
  repository(owner: $owner, name: $repo) {
    pullRequest(number: $pr) {
      author { login }
      reviewThreads(first: 100, after: $endCursor) {
        nodes {
          id
          isResolved
          isOutdated
          path
          line
          originalLine
          startLine
          originalStartLine
          comments(first: 100) {
            nodes {
              id
              author { login }
              body
              createdAt
              url
            }
            pageInfo { hasNextPage endCursor }
          }
        }
        pageInfo { hasNextPage endCursor }
      }
    }
  }
}')

comments_pages=$(gh api graphql --paginate --slurp \
  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
  -f query='
query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
  repository(owner: $owner, name: $repo) {
    pullRequest(number: $pr) {
      comments(first: 100, after: $endCursor) {
        nodes {
          id
          author { login }
          body
        }
        pageInfo { hasNextPage endCursor }
      }
    }
  }
}')

reviews_pages=$(gh api graphql --paginate --slurp \
  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
  -f query='
query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
  repository(owner: $owner, name: $repo) {
    pullRequest(number: $pr) {
      reviews(first: 100, after: $endCursor) {
        nodes {
          id
          author { login }
          body
          state
        }
        pageInfo { hasNextPage endCursor }
      }
    }
  }
}')

jq -n \
  --argjson threads "$threads_pages" \
  --argjson comments "$comments_pages" \
  --argjson reviews "$reviews_pages" '
  ($threads[0].data.repository.pullRequest.author) as $author |
  [$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads |
  [$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments |
  [$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews |
  # Structurally non-actionable bot output; always dropped.
  ["codecov"] as $ci_bot_logins |
  [$all_threads[]
    | select(.comments.pageInfo.hasNextPage == true)
    | {
        thread_id: .id,
        path: .path,
        fetched_comments: (.comments.nodes | length),
        reason: "thread_comments_nested_pagination_unsupported"
      }] as $truncated_threads |
  # Unresolved threads. `isOutdated` means the diff hunk around the comment
  # has shifted since the thread was opened -- not that the reviewer concern
  # was addressed. Resolution state is the only authoritative signal; outdated
  # threads are still surfaced (with their isOutdated flag intact) so the
  # resolver can factor in that the referenced line may have moved.
  [$all_threads[]
    | select(.isResolved == false)] as $unresolved |
{
  review_threads: [$unresolved[] | { node: . }],
  pr_comments: [$all_comments[]
    | select(.author.login != $author.login)
    | select(
        .author.login as $l | $ci_bot_logins | index($l) | not
      )
    | select(.body | test("^\\s*$") | not)],
  review_bodies: [$all_reviews[]
    | select(.body != null and .body != "")
    | select(.author.login != $author.login)
    | select(
        .author.login as $l | $ci_bot_logins | index($l) | not
      )],
  fetch_warnings: (
    if ($truncated_threads | length) > 0 then
      [{
        code: "thread_comments_truncated",
        message: "One or more review threads have more comments than the first 100 fetched by this script; absence of a nested comment is incomplete evidence, not confirmed absence.",
        threads: $truncated_threads
      }]
    else [] end
  )
}'
