All files / src/llm-orchestration/parser llm-output-parser.service.ts

95.77% Statements 68/71
88% Branches 22/25
100% Functions 6/6
95.58% Lines 65/68

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 19217x     17x                 17x 25x                             37x 2x 2x       35x                   2x 3x 3x     3x   3x             2x                   35x 3x     32x 32x 32x       32x 32x   32x 19x 19x 13x     2x 2x 2x       30x   80x   30x         30x 30x 50x 50x       50x   50x 50x       50x       50x 133x             50x 1x 1x       49x 133x 133x   133x 133x 133x       133x       133x 1x     1x         132x 130x       49x 49x   49x 1x     1x       48x   48x       48x                 30x      
import { Injectable, Logger } from '@nestjs/common';
import { Action, ParsedLlmOutput } from '../llm-orchestration.interfaces';
import { LlmToolCall } from '../../llm-provider/llm-provider.interface';
import {
  ACTION_ITEM_START_TAG,
  FIELD_TOOL_NAME,
  PARTIAL_ACTION_ITEM_END_TAG,
  SPECIAL_ACTION_ITEM_TOKEN,
  SPECIAL_FIELD_TOKEN,
} from './parsing.constants';
 
@Injectable()
export class LlmOutputParserService {
  private readonly logger = new Logger(LlmOutputParserService.name);
 
  /**
   * Parses a raw LLM response string into an array of generic Action objects and an explanation string.
   * Malformed individual action blocks are skipped.
   *
   * @param rawResponse The raw string response from the LLM.
   * @param toolCalls Optional native tool calls from the LLM (OpenAI/Z.AI format).
   * @returns A Promise resolving to a ParsedLlmOutput object.
   */
  async parse(
    rawResponse: string,
    toolCalls?: LlmToolCall[],
  ): Promise<ParsedLlmOutput> {
    // 1. Handle Native Tool Calls (if present)
    if (toolCalls && toolCalls.length > 0) {
      this.logger.log(`Parsing ${toolCalls.length} native tool calls.`);
      return this.parseNativeToolCalls(rawResponse, toolCalls);
    }
 
    // 2. Fallback to XML Parsing (Legacy)
    return this.parseXmlActions(rawResponse);
  }
 
  /**
   * Parses native tool calls (JSON format) into internal Action format.
   */
  private parseNativeToolCalls(
    explanation: string,
    toolCalls: LlmToolCall[],
  ): ParsedLlmOutput {
    const actions: Action[] = toolCalls.map((toolCall) => {
      const { id, function: fn } = toolCall;
      const { name, arguments: args } = fn;
 
      // Convert arguments to object if string
      const parsedArgs = typeof args === 'string' ? JSON.parse(args) : args;
 
      return {
        tool_name: name,
        arguments: parsedArgs,
        toolCallId: id, // Store ID for history correlation
      };
    });
 
    return {
      explanation: explanation || null,
      actions,
    };
  }
 
  /**
   * Parses XML-style action blocks from the text.
   */
  private parseXmlActions(rawResponse: string): ParsedLlmOutput {
    if (!rawResponse || rawResponse.trim() === '') {
      return { explanation: null, actions: [] };
    }
 
    const startTagRegex = new RegExp(ACTION_ITEM_START_TAG, 'i');
    const firstActionStartMatch = rawResponse.match(startTagRegex);
    const firstActionStart = firstActionStartMatch
      ? firstActionStartMatch.index
      : -1;
 
    let explanation: string | null = null;
    let actionsPart = rawResponse;
 
    if (firstActionStart > 0) {
      explanation = rawResponse.substring(0, firstActionStart).trim();
      actionsPart = rawResponse.substring(firstActionStart);
    } else if (firstActionStart === -1) {
      // If there are no action start tags, but there are end tags, it might be a partial response.
      // If there are no start or end tags, whole response is the explanation.
      const partialEndTagRegex = new RegExp(PARTIAL_ACTION_ITEM_END_TAG, 'i');
      if (!partialEndTagRegex.test(rawResponse)) {
        return { explanation: rawResponse.trim(), actions: [] };
      }
    }
 
    const actionBlocks = actionsPart
      .split(startTagRegex)
      .filter((block) => block.trim() !== '');
 
    Iif (actionBlocks.length === 0) {
      this.logger.log('No action item blocks found in response.');
      return { explanation, actions: [] };
    }
 
    const parsedActions: Action[] = [];
    for (const block of actionBlocks) {
      try {
        const endTagRegex = new RegExp(
          `${PARTIAL_ACTION_ITEM_END_TAG}${SPECIAL_ACTION_ITEM_TOKEN}?`,
          'i',
        );
        const cleanBlock = block.split(endTagRegex)[0].trim();
 
        const actionArgs: { [key: string]: any } = {};
        const fieldRegex = new RegExp(
          `${SPECIAL_FIELD_TOKEN}(.*?)${SPECIAL_FIELD_TOKEN}`,
          'g',
        );
        const matches = [];
        let match;
 
        // Find all field markers and their positions
        while ((match = fieldRegex.exec(cleanBlock)) !== null) {
          matches.push({
            key: match[1].trim(),
            index: match.index,
            markerLength: match[0].length,
          });
        }
 
        if (matches.length === 0) {
          this.logger.warn(`Skipping block with no field markers.`);
          continue;
        }
 
        // Extract value for each field by slicing the string between markers
        for (let i = 0; i < matches.length; i++) {
          const currentMatch = matches[i];
          const nextMatch = matches[i + 1];
          const valueStartIndex =
            currentMatch.index + currentMatch.markerLength;
          const valueEndIndex = nextMatch ? nextMatch.index : cleanBlock.length;
          const value = cleanBlock
            .substring(valueStartIndex, valueEndIndex)
            .trim();
 
          const key = currentMatch.key
            .toLowerCase()
            .replace(/[^a-z0-9_-]/g, '');
 
          if (!key) {
            this.logger.warn(
              `Skipping field with an empty key after sanitization. Original key: '${currentMatch.key}'`,
            );
            continue;
          }
 
          // If a key already exists, do not overwrite it.
          // This ensures the first occurrence of a field is used.
          if (!(key in actionArgs)) {
            actionArgs[key] = value;
          }
        }
 
        const toolNameKey = FIELD_TOOL_NAME.toLowerCase();
        const toolName = actionArgs[toolNameKey];
 
        if (!toolName || typeof toolName !== 'string') {
          this.logger.warn(
            `Skipping action block due to missing or invalid 'tool_name'.`,
          );
          continue;
        }
 
        // The arguments object contains everything else
        delete actionArgs[toolNameKey];
 
        const newAction: Action = {
          tool_name: toolName,
          arguments: actionArgs,
        };
        parsedActions.push(newAction);
      } catch (error) {
        this.logger.warn(
          `Skipping malformed action block due to error: ${error.message}`,
          `Block content: ${block.substring(0, 200)}...`,
        );
      }
    }
 
    return { explanation, actions: parsedActions };
  }
}