All files / src/providers/gemini Transcription.ts

83.33% Statements 20/24
50% Branches 6/12
100% Functions 3/3
82.6% Lines 19/23

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79                63x     12x 12x       1x 1x   1x 1x     1x   1x 1x     1x       1x                                       1x   1x               1x       1x 1x 1x   1x              
import { TranscriptionRequest, TranscriptionResponse } from "../Provider.js";
import { handleGeminiError } from "./Errors.js";
import { BinaryUtils } from "../../utils/Binary.js";
import { GeminiGenerateContentRequest, GeminiGenerateContentResponse } from "./types.js";
import { logger } from "../../utils/logger.js";
 
export class GeminiTranscription {
  private static readonly DEFAULT_PROMPT =
    "Transcribe the provided audio and respond with only the transcript text.";
 
  constructor(
    private readonly baseUrl: string,
    private readonly apiKey: string
  ) {}
 
  async execute(request: TranscriptionRequest): Promise<TranscriptionResponse> {
    const model = request.model || "gemini-2.0-flash";
    const url = `${this.baseUrl}/models/${model}:generateContent?key=${this.apiKey}`;
 
    const result = await BinaryUtils.toBase64(request.file);
    Iif (!result) {
      throw new Error(`Failed to load audio file: ${request.file}`);
    }
    const { data: base64Data, mimeType } = result;
 
    let prompt = GeminiTranscription.DEFAULT_PROMPT;
    Iif (request.language) {
      prompt += ` Respond in the ${request.language} language.`;
    }
    Iif (request.prompt) {
      prompt += ` ${request.prompt}`;
    }
 
    const payload: GeminiGenerateContentRequest = {
      contents: [
        {
          role: "user",
          parts: [
            { text: prompt },
            {
              inlineData: {
                mimeType,
                data: base64Data
              }
            }
          ]
        }
      ],
      generationConfig: {
        responseMimeType: "text/plain"
      }
    };
 
    logger.logRequest("Gemini", "POST", url, payload);
 
    const response = await fetch(url, {
      method: "POST",
      headers: {
        "Content-Type": "application/json"
      },
      body: JSON.stringify(payload)
    });
 
    Iif (!response.ok) {
      await handleGeminiError(response, model);
    }
 
    const json = (await response.json()) as GeminiGenerateContentResponse;
    logger.logResponse("Gemini", response.status, response.statusText, json);
    const text = json.candidates?.[0]?.content?.parts?.map((p) => p.text).join("") || "";
 
    return {
      text,
      model,
      segments: [] // Gemini's generateContent doesn't return segments by default
    };
  }
}