All files / packages/tokenpatch index.ts

95.34% Statements 41/43
90.47% Branches 19/21
100% Functions 4/4
95.34% Lines 41/43

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 1118x 8x         8x         8x 8x             8x                 20x 10x   10x     20x 10x 10x 10x             10x               10x   10x     10x   10x 10x   10x 2x     2x   40x 2x 8x 2x 2x   18x 2x   6x 6x   225x       6x 6x         4x 4x   2x               10x 10x 10x         10x    
import { initializeParser } from './parser';
import {
  handleBeginOfFilePatch,
  handleEndOfFilePatch,
  handleStandardPatch,
} from './patcher';
import {
  SPECIAL_PATCH_BEGIN_FILE_MARKER,
  SPECIAL_PATCH_END_FILE_MARKER,
} from '../../src/llm-orchestration/parser/parsing.constants';
import { TokenizerStrategy } from './tokenizer.interface';
import { TreeSitterTokenizer } from './strategies/tree-sitter-tokenizer';
import { TiktokenTokenizer } from './strategies/tiktoken-tokenizer';
 
export interface ApplyPatchOptions {
  grammarPath?: string;
  useTiktoken?: boolean;
}
 
export async function applySnippetPatch(
  sourceCode: string,
  patchCode: string,
  optionsOrGrammarPath: string | ApplyPatchOptions,
): Promise<string> {
  let strategy: TokenizerStrategy;
  let options: ApplyPatchOptions;
 
  // Backward compatibility for when the 3rd argument was just grammarPath string
  if (typeof optionsOrGrammarPath === 'string') {
    options = { grammarPath: optionsOrGrammarPath };
  } else {
    options = optionsOrGrammarPath;
  }
 
  if (options.useTiktoken) {
    strategy = new TiktokenTokenizer();
  } else if (options.grammarPath) {
    const parser = await initializeParser(options.grammarPath);
    strategy = new TreeSitterTokenizer(parser);
  } else E{
    // Default to Tiktoken if no grammar path is provided
    strategy = new TiktokenTokenizer();
  }
 
  const sourceTokens = strategy.tokenize(sourceCode);
 
  let patchResult: {
    replaceStart: number;
    replaceEnd: number;
    patchInsertStart: number;
    patchInsertEnd: number;
  };
  let processedPatchCode = patchCode;
 
  const beginOfFileRegex = new RegExp(
    `//\\s*${SPECIAL_PATCH_BEGIN_FILE_MARKER}.*`,
  );
  const endOfFileRegex = new RegExp(`//\\s*${SPECIAL_PATCH_END_FILE_MARKER}.*`);
 
  const hasBeginOfFile = beginOfFileRegex.test(patchCode);
  const hasEndOfFile = endOfFileRegex.test(patchCode);
 
  if (hasBeginOfFile) {
    processedPatchCode = patchCode.replace(beginOfFileRegex, '');
    // We need to tokenize the processed patch code
    // We filter out empty text tokens if any strategy produces them, though likely not needed for tiktoken
    const patchTokens = strategy
      .tokenize(processedPatchCode)
      .filter((t) => t.text !== '' && t.text !== '\n');
    patchResult = handleBeginOfFilePatch(sourceTokens, patchTokens);
  } else if (hasEndOfFile) {
    processedPatchCode = patchCode.replace(endOfFileRegex, '');
    const patchTokens = strategy
      .tokenize(processedPatchCode)
      .filter((t) => t.text !== '' && t.text !== '\n');
    patchResult = handleEndOfFilePatch(sourceTokens, patchTokens, sourceCode);
  } else {
    processedPatchCode = patchCode.trim();
    const patchTokens = strategy
      .tokenize(processedPatchCode)
      .filter((t) => t.text !== '');
 
    // Heuristic: If using Tiktoken, we discard the first and last 3 tokens to improve matching resilience.
    // This prevents issues where the LLM hallucinating extra delimiters or context at the edges of the snippet.
    const isTiktoken = options.useTiktoken || !options.grammarPath;
    if (
      isTiktoken &&
      patchTokens.length >= 6 &&
      patchTokens.length < sourceTokens.length
    ) {
      const innerTokens = patchTokens.slice(2, -2);
      patchResult = handleStandardPatch(sourceTokens, innerTokens);
    } else {
      patchResult = handleStandardPatch(sourceTokens, patchTokens);
    }
  }
 
  // NOTE: replaceStart/End are byte offsets.
  // If your code can contain non-ASCII characters, you need to
  // map byte offsets to JS string indices. For pure ASCII, this
  // works as-is.
  const prefix = sourceCode.slice(0, patchResult.replaceStart);
  const suffix = sourceCode.slice(patchResult.replaceEnd);
  const finalPatchContent = processedPatchCode.slice(
    patchResult.patchInsertStart,
    patchResult.patchInsertEnd,
  );
 
  return prefix + finalPatchContent + suffix;
}