All files / src/llm-orchestration history-compression.service.ts

93.51% Statements 173/185
87.65% Branches 71/81
100% Functions 17/17
94.18% Lines 162/172

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 52119x   19x                                         19x 70x         70x     70x             70x     70x                         67x 67x 1x 1x     66x 1x       65x     65x 14x       51x                                 65x     65x       65x 487x 487x 134x 138x 138x       118x               65x 65x 487x 487x 138x         65x 487x     487x 138x 138x 57x 57x   57x 64x 64x 37x                     487x 134x 138x 138x   138x 61x 61x 60x   60x 60x       30x                     65x                         51x 51x 426x 426x 119x 123x 55x             51x 51x 426x 426x 122x       51x 51x   51x   426x 122x 122x 55x 55x   55x       54x   54x 64x 64x   64x 28x             54x 26x 26x             400x 119x 119x 123x     123x 73x     50x 50x       50x 50x     50x   50x 27x 27x 27x             23x     119x 27x             373x     51x 39x 39x 23x   39x 19x   39x     51x                 219x                 123x             111x 111x     111x 111x 4x 4x           111x                     27x 27x   27x 1x 1x           27x         27x                                     112x       112x   112x 128x     112x                       301x   301x 301x                         26x     26x   26x 35x     35x 35x                 35x 35x   35x 35x             26x                             26x 26x     26x 26x 28x     26x   26x   26x 35x   35x 28x   28x         7x         26x                               26x            
import { Injectable, Logger } from '@nestjs/common';
import { LlmContent } from '../llm-provider/llm-provider.interface';
import { ApplicationStateService } from '../application-state/application-state.service';
 
interface FileVersionInfo {
  latestIndex: number;
  latestToolName: string;
}
 
/**
 * Service that compresses chat history by redacting older file contents
 * when the same file is referenced multiple times across ANY in-scope tool.
 *
 * In-scope tools (Level 2):
 * - request_context tool results (file content in output)
 * - create_file/overwrite_file tool call arguments (file content in input)
 * - delete_file tool call arguments (invalidates all prior file content)
 *
 * The unified index ensures cross-tool redaction: e.g., if a file is read
 * via request_context and then overwritten via overwrite_file, the older
 * request_context result is redacted.
 */
@Injectable()
export class HistoryCompressionService {
  private readonly logger = new Logger(HistoryCompressionService.name);
 
  // Regex to detect file markers in request_context output
  // Format: "// File: ./path/to/file.ts" or "// File: path/to/file.ts"
  // Captures the path after "File: " (including optional ./)
  private readonly FILE_MARKER_REGEX = /\/\/ File: (\.?[^\n]+)/g;
 
  // Tools that carry file content and participate in the unified version index
  private readonly FILE_CONTENT_TOOLS = [
    'create_file',
    'overwrite_file',
    'delete_file',
  ];
 
  // Tools that have content that can be redacted in their arguments
  private readonly FILE_WRITE_TOOLS = ['create_file', 'overwrite_file'];
 
  constructor(
    private readonly applicationStateService: ApplicationStateService,
  ) {}
 
  /**
   * Compresses history by redacting older file contents using a unified
   * file version index across all in-scope tools.
   *
   * @param history The chat history to compress
   * @returns Compressed history with older file contents redacted
   */
  async compress(history: LlmContent[]): Promise<LlmContent[]> {
    // Check if compression is enabled
    const enabled =
      await this.applicationStateService.getHistoryCompressionEnabled();
    if (!enabled) {
      this.logger.debug('History compression is disabled');
      return history;
    }
 
    if (history.length === 0) {
      return history;
    }
 
    // Build unified file version index across all in-scope tools
    const fileIndex = this.buildUnifiedFileIndex(history);
 
    // If no files were found, return original history
    if (fileIndex.size === 0) {
      return history;
    }
 
    // Redact based on the unified index
    return this.redactBasedOnIndex(history, fileIndex);
  }
 
  /**
   * Builds a unified file version index by scanning ALL in-scope tools.
   *
   * Uses tool result indices for ordering so that comparisons between
   * request_context results and file-content tool calls are on equal footing.
   * For file-content tools (create_file/overwrite_file/delete_file), the
   * effective index is the index of their tool result, not the model turn.
   *
   * @param history The chat history to scan
   * @returns Map of normalized file path -> {latestIndex, latestToolName}
   */
  private buildUnifiedFileIndex(
    history: LlmContent[],
  ): Map<string, FileVersionInfo> {
    const fileIndex = new Map<string, FileVersionInfo>();
 
    // Build tool_call_id -> {historyIndex, toolName} for all in-scope tools
    const toolCallInfo = new Map<
      string,
      { historyIndex: number; toolName: string }
    >();
    for (let i = 0; i < history.length; i++) {
      const turn = history[i];
      if (turn.role === 'model' && turn.tool_calls) {
        for (const tc of turn.tool_calls) {
          const toolName = tc.function?.name;
          if (
            toolName === 'request_context' ||
            this.isFileContentTool(toolName)
          ) {
            toolCallInfo.set(tc.id, { historyIndex: i, toolName });
          }
        }
      }
    }
 
    // Build tool_call_id -> tool result index mapping
    // Used to get effective indices for file content tools
    const toolResultIndex = new Map<string, number>();
    for (let i = 0; i < history.length; i++) {
      const turn = history[i];
      if (turn.role === 'tool' && turn.tool_call_id) {
        toolResultIndex.set(turn.tool_call_id, i);
      }
    }
 
    // Reverse pass: build unified index
    for (let i = history.length - 1; i >= 0; i--) {
      const turn = history[i];
 
      // Case 1: tool result from request_context
      if (turn.role === 'tool' && turn.tool_call_id) {
        const info = toolCallInfo.get(turn.tool_call_id);
        if (info && info.toolName === 'request_context') {
          const text = turn.parts.map((p) => p.text).join('');
          const filePaths = this.extractFilePaths(text);
 
          for (const filePath of filePaths) {
            const normalized = this.normalizePath(filePath);
            if (!fileIndex.has(normalized)) {
              fileIndex.set(normalized, {
                latestIndex: i,
                latestToolName: 'request_context',
              });
            }
          }
        }
      }
 
      // Case 2: model turn with in-scope file tool calls
      // Use tool result index as effective index for proper ordering
      if (turn.role === 'model' && turn.tool_calls) {
        for (let j = turn.tool_calls.length - 1; j >= 0; j--) {
          const tc = turn.tool_calls[j];
          const toolName = tc.function?.name;
 
          if (this.isFileContentTool(toolName)) {
            const filePath = this.extractFilePathFromToolCall(tc);
            if (filePath) {
              const normalized = this.normalizePath(filePath);
              // Use tool result index as effective index for consistent ordering
              const effectiveIndex = toolResultIndex.get(tc.id) ?? i;
              if (
                !fileIndex.has(normalized) ||
                effectiveIndex > fileIndex.get(normalized)!.latestIndex
              ) {
                fileIndex.set(normalized, {
                  latestIndex: effectiveIndex,
                  latestToolName: toolName!,
                });
              }
            }
          }
        }
      }
    }
 
    return fileIndex;
  }
 
  /**
   * Redacts file content in history based on the unified file version index.
   * Handles both request_context tool results and file-content tool call arguments.
   * Skips delete_file from redaction (no content to redact).
   */
  private redactBasedOnIndex(
    history: LlmContent[],
    fileIndex: Map<string, FileVersionInfo>,
  ): LlmContent[] {
    // Build tool_call_id -> model turn index for request_context identification
    const toolCallToIndex = new Map<string, number>();
    for (let i = 0; i < history.length; i++) {
      const turn = history[i];
      if (turn.role === 'model' && turn.tool_calls) {
        for (const tc of turn.tool_calls) {
          if (tc.function?.name === 'request_context') {
            toolCallToIndex.set(tc.id, i);
          }
        }
      }
    }
 
    // Build tool_call_id -> tool result index for effective index calculation
    const toolResultIndex = new Map<string, number>();
    for (let i = 0; i < history.length; i++) {
      const turn = history[i];
      if (turn.role === 'tool' && turn.tool_call_id) {
        toolResultIndex.set(turn.tool_call_id, i);
      }
    }
 
    let redactedResults = 0;
    let redactedToolCalls = 0;
 
    const result = history.map((turn, index) => {
      // Redact request_context tool results
      if (turn.role === 'tool' && turn.tool_call_id) {
        const modelTurnIndex = toolCallToIndex.get(turn.tool_call_id);
        if (modelTurnIndex !== undefined) {
          const text = turn.parts.map((p) => p.text).join('');
          const filePaths = this.extractFilePaths(text);
 
          if (filePaths.length > 0) {
            const filesToRedact: {
              filePath: string;
              latestToolName: string;
            }[] = [];
 
            for (const filePath of filePaths) {
              const normalized = this.normalizePath(filePath);
              const versionInfo = fileIndex.get(normalized);
 
              if (versionInfo && versionInfo.latestIndex > index) {
                filesToRedact.push({
                  filePath,
                  latestToolName: versionInfo.latestToolName,
                });
              }
            }
 
            if (filesToRedact.length > 0) {
              redactedResults++;
              return this.redactToolResult(turn, filesToRedact);
            }
          }
        }
      }
 
      // Redact file-write tool call arguments (NOT delete_file - no content to redact)
      if (turn.role === 'model' && turn.tool_calls) {
        let hasRedaction = false;
        const newToolCalls = turn.tool_calls.map((tc) => {
          const toolName = tc.function?.name;
 
          // Skip tools that don't have content to redact
          if (!this.isFileWriteTool(toolName)) {
            return tc;
          }
 
          const filePath = this.extractFilePathFromToolCall(tc);
          Iif (!filePath) {
            return tc;
          }
 
          const normalized = this.normalizePath(filePath);
          const versionInfo = fileIndex.get(normalized);
 
          // Use effective index (tool result index) for comparison
          const effectiveIndex = toolResultIndex.get(tc.id) ?? index;
 
          if (versionInfo && versionInfo.latestIndex > effectiveIndex) {
            hasRedaction = true;
            redactedToolCalls++;
            return this.redactToolCallContent(
              tc,
              filePath,
              versionInfo.latestToolName,
            );
          }
 
          return tc;
        });
 
        if (hasRedaction) {
          return {
            ...turn,
            tool_calls: newToolCalls,
          };
        }
      }
 
      return turn;
    });
 
    if (redactedResults > 0 || redactedToolCalls > 0) {
      const parts: string[] = [];
      if (redactedResults > 0) {
        parts.push(`${redactedResults} older tool result(s)`);
      }
      if (redactedToolCalls > 0) {
        parts.push(`${redactedToolCalls} older tool call(s)`);
      }
      this.logger.log(`Compressed history: redacted ${parts.join(' and ')}`);
    }
 
    return result;
  }
 
  /**
   * Checks if a tool name is a file content tool in scope for compression.
   * Includes create_file, overwrite_file, and delete_file.
   * Used for index building.
   */
  private isFileContentTool(toolName: string | undefined): boolean {
    return !!toolName && this.FILE_CONTENT_TOOLS.includes(toolName);
  }
 
  /**
   * Checks if a tool name is a file write tool with content to redact.
   * Includes create_file and overwrite_file (NOT delete_file).
   * Used for redaction.
   */
  private isFileWriteTool(toolName: string | undefined): boolean {
    return !!toolName && this.FILE_WRITE_TOOLS.includes(toolName);
  }
 
  /**
   * Extracts file_path from a tool call's arguments.
   */
  private extractFilePathFromToolCall(toolCall: any): string | null {
    const args = toolCall.function?.arguments;
    Iif (!args) return null;
 
    // Arguments might be a string (JSON) or an object
    let parsedArgs = args;
    if (typeof args === 'string') {
      try {
        parsedArgs = JSON.parse(args);
      } catch {
        return null;
      }
    }
 
    return parsedArgs.file_path || null;
  }
 
  /**
   * Redacts the content in a tool call's arguments.
   */
  private redactToolCallContent(
    toolCall: any,
    filePath: string,
    latestToolName: string,
  ): any {
    const args = toolCall.function?.arguments;
    let parsedArgs = args;
 
    if (typeof args === 'string') {
      try {
        parsedArgs = JSON.parse(args);
      } catch {
        return toolCall;
      }
    }
 
    const redactedArgs = {
      ...parsedArgs,
      content: `[Content for ${filePath} redacted - see newer ${latestToolName} for this file]`,
    };
 
    return {
      ...toolCall,
      function: {
        ...toolCall.function,
        arguments:
          typeof args === 'string'
            ? JSON.stringify(redactedArgs)
            : redactedArgs,
      },
    };
  }
 
  /**
   * Extracts file paths from request_context output.
   *
   * @param text The tool result text
   * @returns Array of file paths found in the output
   */
  private extractFilePaths(text: string): string[] {
    const paths: string[] = [];
    let match;
 
    // Reset regex state
    this.FILE_MARKER_REGEX.lastIndex = 0;
 
    while ((match = this.FILE_MARKER_REGEX.exec(text)) !== null) {
      paths.push(match[1]);
    }
 
    return paths;
  }
 
  /**
   * Normalizes a file path for comparison.
   * Removes leading "./" and trailing slashes.
   *
   * @param path The file path to normalize
   * @returns Normalized path
   */
  private normalizePath(path: string): string {
    // Remove leading dot and slash variations
    let normalized = path.replace(/^\.?\/*/, '');
    // Remove trailing slashes
    normalized = normalized.replace(/\/+$/, '');
    return normalized;
  }
 
  /**
   * Parses a tool result into file blocks.
   * Each block contains the file path and its content.
   *
   * @param text The tool result text
   * @returns Array of file blocks
   */
  private parseFileBlocks(
    text: string,
  ): { filePath: string; content: string }[] {
    const blocks: { filePath: string; content: string }[] = [];
 
    // Split by file markers, keeping the delimiter
    const parts = text.split(/(?=\/\/ File: )/g);
 
    for (const part of parts) {
      Iif (!part.trim()) continue;
 
      // Extract file path from the first line
      const firstLineEnd = part.indexOf('\n');
      Iif (firstLineEnd === -1) {
        // No newline, just a file marker with no content
        const match = part.match(/\/\/ File: (.+)/);
        Iif (match) {
          blocks.push({ filePath: match[1].trim(), content: part });
        }
        continue;
      }
 
      const firstLine = part.substring(0, firstLineEnd);
      const match = firstLine.match(/\/\/ File: (.+)/);
 
      if (match) {
        blocks.push({
          filePath: match[1].trim(),
          content: part,
        });
      }
    }
 
    return blocks;
  }
 
  /**
   * Redacts a tool result by replacing file content with a redaction message.
   * Preserves content for files that are not in the redaction list.
   *
   * @param turn The tool result turn to redact
   * @param filesToRedact List of files that have newer content, with the tool name of the newer version
   * @returns New turn with redacted content
   */
  private redactToolResult(
    turn: LlmContent,
    filesToRedact: { filePath: string; latestToolName: string }[],
  ): LlmContent {
    const text = turn.parts.map((p) => p.text).join('');
    const blocks = this.parseFileBlocks(text);
 
    // Build a map of normalized path -> latestToolName for redaction messages
    const redactionInfo = new Map<string, string>();
    for (const f of filesToRedact) {
      redactionInfo.set(this.normalizePath(f.filePath), f.latestToolName);
    }
 
    const normalizedFilesToRedact = new Set(redactionInfo.keys());
 
    const resultParts: string[] = [];
 
    for (const block of blocks) {
      const normalizedBlockPath = this.normalizePath(block.filePath);
 
      if (normalizedFilesToRedact.has(normalizedBlockPath)) {
        const latestToolName = redactionInfo.get(normalizedBlockPath)!;
        // Redact this file
        resultParts.push(
          `[Content for ${block.filePath} redacted - see newer ${latestToolName}]`,
        );
      } else {
        // Keep original content
        resultParts.push(block.content);
      }
    }
 
    // If no blocks were parsed (unexpected format), fall back to simple redaction
    Iif (resultParts.length === 0) {
      const fileList =
        filesToRedact.length === 1
          ? filesToRedact[0].filePath
          : `${filesToRedact.length} files`;
      const latestToolName = filesToRedact[0].latestToolName;
      return {
        ...turn,
        parts: [
          {
            text: `[Content for ${fileList} redacted - see newer ${latestToolName}]`,
          },
        ],
      };
    }
 
    return {
      ...turn,
      parts: [{ text: resultParts.join('\n\n') }],
    };
  }
}