All files / src/sdk verify.ts

83.9% Statements 73/87
73.13% Branches 49/67
100% Functions 13/13
86.25% Lines 69/80

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315                                                                                              17x                         17x 17x     5x       14x                                                             18x                     13x 12x   12x 12x   12x 12x 12x 12x 12x             1x       13x 8x     5x 3x     2x           2x   2x 2x         2x     2x     2x 1x   1x     2x     2x     2x 2x             15x 1x     14x   14x 14x       14x 1x                   13x   12x 3x           3x     9x 2x                         3x                   16x 2x     14x 16x 16x   16x       19x 1x     18x 18x                                 15x   15x     19x 19x 19x   5x 2x     3x 3x                                         16x 16x   11x          
import { z } from 'zod';
import type { SkillDefinition } from '../config/schema.js';
import { FindingSchema, type Finding, type UsageStats } from '../types/index.js';
import { aggregateUsage } from './usage.js';
import { extractBalancedJson } from './extract.js';
import {
  WardenAuthenticationError,
  classifyError,
  isAuthenticationError,
  isAuthenticationErrorMessage,
  isSubprocessError,
} from './errors.js';
import {
  getRuntime,
  getRuntimeProviderOptions,
  type RuntimeName,
  type SkillRunResult,
} from './runtimes/index.js';
import type { FindingProcessingEvent } from './types.js';
import { runPool } from '../utils/index.js';
import {
  buildChangedFilesSection,
  buildJsonOutputSection,
  buildPullRequestContextSection,
  buildTaggedSection,
  joinPromptSections,
  type PromptPRContext,
} from './prompt-sections.js';
 
export interface VerifyFindingsOptions {
  repoPath: string;
  skill: SkillDefinition;
  apiKey?: string;
  runtime?: RuntimeName;
  model?: string;
  maxTurns?: number;
  abortController?: AbortController;
  pathToClaudeCodeExecutable?: string;
  prContext?: PromptPRContext;
  onFindingProcessing?: (event: FindingProcessingEvent) => void;
}
 
export interface VerifyFindingsResult {
  findings: Finding[];
  usage?: UsageStats;
}
 
const VerificationVerdictSchema = z.object({
  verdict: z.enum(['keep', 'revise', 'reject']),
  finding: FindingSchema.nullish(),
  reason: z.string().optional(),
});
 
type VerificationVerdict = z.infer<typeof VerificationVerdictSchema>;
 
interface VerificationTaskResult {
  finding?: Finding;
  usage?: UsageStats;
}
 
const JSON_OBJECT_START = /\{/g;
const VERIFICATION_CONCURRENCY = 4;
 
function isAbortRequested(error: unknown, abortController?: AbortController): boolean {
  return (abortController?.signal.aborted ?? false) || classifyError(error).code === 'aborted';
}
 
function buildVerificationSystemPrompt(skill: SkillDefinition): string {
  return `<role>
You are Warden's finding verifier. You validate one candidate finding at a time.
Your job is to deeply trace the code, look for mitigations and intent, then keep, revise, or reject the candidate.
</role>
 
<tools>
Use read-only tools to inspect the repository. Read the reported file and use Grep/Glob to trace callers, imports, wrappers, guards, validators, and related code.
</tools>
 
<skill_instructions>
The candidate was produced for this skill. Use these criteria as the only scope for verification:
 
${skill.prompt}
</skill_instructions>
 
<verification_stance>
- Keep findings only when the issue is still real after tracing.
- Revise findings when the issue is real but the severity, confidence, title, description, or verification needs a narrower scope.
- Reject findings when the path is mitigated, unreachable, intentional, outside skill scope, or lacks a concrete code-level violation of the skill criteria.
- Do not reject solely because broader repository invariants or caller behavior are incomplete in the inspected context. If the changed code shows a concrete source, boundary, and sink with no verified mitigation, keep or revise the finding.
- When reachability or impact is plausible but not fully proven, keep the finding and revise severity, confidence, or scope instead of rejecting it.
</verification_stance>
 
${buildJsonOutputSection(`
{"verdict":"keep|revise|reject","finding":{...},"reason":"short reason"}
 
Use "finding" only for verdict "revise". For revised findings, return the complete Warden finding object and keep the original id.
`)}`;
}
 
function buildVerificationUserPrompt(finding: Finding, prContext?: PromptPRContext): string {
  return joinPromptSections([
    buildPullRequestContextSection(prContext),
    buildChangedFilesSection(prContext, finding.location?.path),
    buildTaggedSection('candidate_finding', JSON.stringify(finding, null, 2)),
    `<task>
Verify this candidate. Return keep, revise, or reject.
</task>`,
  ]);
}
 
function parseVerificationVerdict(text: string): VerificationVerdict | null {
  for (const match of text.matchAll(JSON_OBJECT_START)) {
    Iif (match.index === undefined) continue;
 
    const json = extractBalancedJson(text, match.index);
    Iif (!json) continue;
 
    try {
      const parsed = JSON.parse(json);
      const result = VerificationVerdictSchema.safeParse(parsed);
      Eif (result.success) {
        return result.data;
      }
    } catch {
      // Keep scanning in case prose or another object appears before the verdict.
    }
  }
 
  return null;
}
 
function applyVerdict(finding: Finding, verdict: VerificationVerdict | null): Finding | null {
  if (!verdict || verdict.verdict === 'keep') {
    return finding;
  }
 
  if (verdict.verdict === 'reject') {
    return null;
  }
 
  Iif (!verdict.finding) {
    return finding;
  }
 
  // Verification runs after hunk validation, so revisions keep the original
  // validated anchors and fix payload.
  const revised = { ...verdict.finding, id: finding.id };
 
  if (finding.location) {
    revised.location = finding.location;
  } else E{
    delete revised.location;
  }
 
  Iif (finding.additionalLocations) {
    revised.additionalLocations = finding.additionalLocations;
  } else {
    delete revised.additionalLocations;
  }
 
  if (finding.suggestedFix) {
    revised.suggestedFix = finding.suggestedFix;
  } else {
    delete revised.suggestedFix;
  }
 
  Iif (finding.elapsedMs !== undefined) {
    revised.elapsedMs = finding.elapsedMs;
  } else {
    delete revised.elapsedMs;
  }
 
  const result = FindingSchema.safeParse(revised);
  return result.success ? result.data : finding;
}
 
function throwIfAuthenticationFailure(
  authError: string | undefined,
  result: SkillRunResult | undefined
): void {
  if (authError) {
    throw new WardenAuthenticationError(authError);
  }
 
  Iif (!result) return;
 
  const authMessage = result.errors.find(isAuthenticationErrorMessage);
  Iif (result.status === 'auth_error') {
    throw new WardenAuthenticationError(authMessage);
  }
 
  if (authMessage) {
    throw new WardenAuthenticationError(authMessage);
  }
}
 
function notifyVerdict(
  options: VerifyFindingsOptions,
  finding: Finding,
  verdict: VerificationVerdict | null,
  next: Finding | null
): void {
  if (!verdict) return;
 
  if (verdict.verdict === 'reject') {
    options.onFindingProcessing?.({
      stage: 'verification',
      action: 'rejected',
      finding,
      reason: verdict.reason,
    });
    return;
  }
 
  if (verdict.verdict === 'revise' && next) {
    options.onFindingProcessing?.({
      stage: 'verification',
      action: 'revised',
      finding,
      replacement: next,
      reason: verdict.reason,
    });
  }
}
 
function keepFindingAfterInterruptedVerification(finding: Finding): VerificationTaskResult {
  // An abort is inconclusive, not a verifier rejection. Preserve candidates so
  // interrupted runs report the partial findings already collected.
  return { finding };
}
 
/**
 * Verify candidate findings with a second read-only repo-aware agent pass.
 */
export async function verifyFindings(
  findings: Finding[],
  options: VerifyFindingsOptions
): Promise<VerifyFindingsResult> {
  if (findings.length === 0) {
    return { findings };
  }
 
  const runtimeName = options.runtime ?? 'pi';
  const runtime = getRuntime(runtimeName);
  const systemPrompt = buildVerificationSystemPrompt(options.skill);
 
  const results = await runPool<Finding, VerificationTaskResult>(
    findings,
    VERIFICATION_CONCURRENCY,
    async (finding) => {
      if (options.abortController?.signal.aborted) {
        return keepFindingAfterInterruptedVerification(finding);
      }
 
      try {
        const { result, authError } = await runtime.runSkill({
          apiKey: options.apiKey,
          systemPrompt,
          userPrompt: buildVerificationUserPrompt(finding, options.prContext),
          repoPath: options.repoPath,
          skillName: `${options.skill.name}:verification`,
          options: {
            model: options.model,
            maxTurns: options.maxTurns,
            abortController: options.abortController,
          },
          tools: options.skill.tools,
          providerOptions: getRuntimeProviderOptions(runtimeName, {
            pathToClaudeCodeExecutable: options.pathToClaudeCodeExecutable,
          }),
        });
 
        throwIfAuthenticationFailure(authError, result);
 
        const verdict = result?.status === 'success'
          ? parseVerificationVerdict(result.text)
          : null;
        const next = applyVerdict(finding, verdict);
        notifyVerdict(options, finding, verdict, next);
        return { finding: next ?? undefined, usage: result?.usage };
      } catch (error) {
        if (isAbortRequested(error, options.abortController)) {
          return keepFindingAfterInterruptedVerification(finding);
        }
 
        Eif (error instanceof WardenAuthenticationError) {
          throw error;
        }
 
        if (isSubprocessError(error)) {
          const errorMessage = error instanceof Error ? error.message : String(error);
          throw new WardenAuthenticationError(
            `Claude Code subprocess failed (${errorMessage}).\n` +
            `This usually means the claude CLI cannot run in this environment.`,
            { cause: error }
          );
        }
 
        if (isAuthenticationError(error)) {
          throw new WardenAuthenticationError(undefined, { cause: error });
        }
 
        return { finding };
      }
    }
  );
 
  const verified = results.flatMap((result) => result.finding ? [result.finding] : []);
  const usage = results.map((result) => result.usage).filter((u): u is UsageStats => u !== undefined);
 
  return {
    findings: verified,
    usage: usage.length > 0 ? aggregateUsage(usage) : undefined,
  };
}