Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | 63x 12x 12x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x | import { TranscriptionRequest, TranscriptionResponse } from "../Provider.js";
import { handleGeminiError } from "./Errors.js";
import { BinaryUtils } from "../../utils/Binary.js";
import { GeminiGenerateContentRequest, GeminiGenerateContentResponse } from "./types.js";
import { logger } from "../../utils/logger.js";
export class GeminiTranscription {
private static readonly DEFAULT_PROMPT =
"Transcribe the provided audio and respond with only the transcript text.";
constructor(
private readonly baseUrl: string,
private readonly apiKey: string
) {}
async execute(request: TranscriptionRequest): Promise<TranscriptionResponse> {
const model = request.model || "gemini-2.0-flash";
const url = `${this.baseUrl}/models/${model}:generateContent?key=${this.apiKey}`;
const result = await BinaryUtils.toBase64(request.file);
Iif (!result) {
throw new Error(`Failed to load audio file: ${request.file}`);
}
const { data: base64Data, mimeType } = result;
let prompt = GeminiTranscription.DEFAULT_PROMPT;
Iif (request.language) {
prompt += ` Respond in the ${request.language} language.`;
}
Iif (request.prompt) {
prompt += ` ${request.prompt}`;
}
const payload: GeminiGenerateContentRequest = {
contents: [
{
role: "user",
parts: [
{ text: prompt },
{
inlineData: {
mimeType,
data: base64Data
}
}
]
}
],
generationConfig: {
responseMimeType: "text/plain"
}
};
logger.logRequest("Gemini", "POST", url, payload);
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(payload)
});
Iif (!response.ok) {
await handleGeminiError(response, model);
}
const json = (await response.json()) as GeminiGenerateContentResponse;
logger.logResponse("Gemini", response.status, response.statusText, json);
const text = json.candidates?.[0]?.content?.parts?.map((p) => p.text).join("") || "";
return {
text,
model,
segments: [] // Gemini's generateContent doesn't return segments by default
};
}
}
|