All files / Cognigy-CLI/build/utils paragraphSplitter.js

0% Statements 0/22
0% Branches 0/27
0% Functions 0/2
0% Lines 0/22

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33                                                                 
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
    return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitParagraphsByTokenLength = splitParagraphsByTokenLength;
const gpt_3_encoder_1 = require("gpt-3-encoder");
const config_1 = __importDefault(require("./config"));
function splitParagraphsByTokenLength(title, body, metadata) {
    const paragraph = `${title ? title + '\n' : ''}${body ? body + '\n' : ''}${metadata ? metadata + '\n' : ''}`;
    const encodedStr = (0, gpt_3_encoder_1.encode)(paragraph);
    if (encodedStr.length > config_1.default.maxNumberOfTokens) {
        const parts = Math.ceil(encodedStr.length / config_1.default.maxNumberOfTokens);
        const targetLength = Math.ceil(paragraph.length / parts) - (metadata?.length || 0);
        const sentences = body.match(/[^.!?]+[.!?]+/g) || [];
        const result = [];
        let currentParagraph = `${title ? title + '\n' : ''}`;
        for (const sentence of sentences) {
            if (currentParagraph.length + sentence.length < targetLength) {
                currentParagraph += sentence;
            }
            else {
                currentParagraph += `\n${metadata ? metadata + '\n' : ''}`;
                result.push(currentParagraph.trim());
                currentParagraph = `${title ? title + '\n' : ''}`;
            }
        }
        return result;
    }
    else
        return [paragraph];
}