All files / Cognigy-CLI/build/utils textSplitter.js

0% Statements 0/72
0% Branches 0/63
0% Functions 0/13
0% Lines 0/59

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119                                                                                                                                                                                                                                             
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    var desc = Object.getOwnPropertyDescriptor(m, k);
    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
      desc = { enumerable: true, get: function() { return m[k]; } };
    }
    Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
    Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
    o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
    var ownKeys = function(o) {
        ownKeys = Object.getOwnPropertyNames || function (o) {
            var ar = [];
            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
            return ar;
        };
        return ownKeys(o);
    };
    return function (mod) {
        if (mod && mod.__esModule) return mod;
        var result = {};
        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
        __setModuleDefault(result, mod);
        return result;
    };
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitDocs = splitDocs;
exports.splitText = splitText;
const splitters = __importStar(require("@langchain/textsplitters"));
async function splitDocs(document, options, defaultSplitter) {
    let splitter;
    const splitterToUse = options.splitter || defaultSplitter;
    const chunkMaxSize = (options.chunkSize || 2000) - 1000;
    const chunkSize = chunkMaxSize > 0 ? chunkMaxSize : 1000;
    const chunkOverlap = 0;
    switch (splitterToUse) {
        case 'CharacterTextSplitter':
            splitter = new splitters.CharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'MarkdownTextSplitter':
            splitter = new splitters.MarkdownTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'TokenTextSplitter':
            splitter = new splitters.TokenTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'RecursiveCharacterTextSplitter':
            splitter = new splitters.RecursiveCharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        default:
            splitter = new splitters.RecursiveCharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
    }
    const splitParagraphs = await splitter.splitDocuments(document);
    return splitParagraphs.filter((paragraph) => !options.excludeString ||
        !paragraph.pageContent.includes(options.excludeString));
}
async function splitText(text, options, defaultSplitter) {
    let splitter;
    const splitterToUse = options.splitter || defaultSplitter;
    const chunkSize = options.chunkSize || 2000;
    const chunkOverlap = typeof options.chunkOverlap !== 'undefined' ? options.chunkOverlap : 200;
    switch (splitterToUse) {
        case 'CharacterTextSplitter':
            splitter = new splitters.CharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'MarkdownTextSplitter':
            splitter = new splitters.MarkdownTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'TokenTextSplitter':
            splitter = new splitters.TokenTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        case 'RecursiveCharacterTextSplitter':
            splitter = new splitters.RecursiveCharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
            break;
        default:
            splitter = new splitters.RecursiveCharacterTextSplitter({
                chunkSize,
                chunkOverlap,
            });
    }
    const splitParagraphs = await splitter.splitText(text);
    return splitParagraphs.filter((paragraph) => !options.excludeString || !paragraph.includes(options.excludeString));
}