Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | "use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.splitParagraphsByTokenLength = splitParagraphsByTokenLength; const gpt_3_encoder_1 = require("gpt-3-encoder"); const config_1 = __importDefault(require("./config")); function splitParagraphsByTokenLength(title, body, metadata) { const paragraph = `${title ? title + '\n' : ''}${body ? body + '\n' : ''}${metadata ? metadata + '\n' : ''}`; const encodedStr = (0, gpt_3_encoder_1.encode)(paragraph); if (encodedStr.length > config_1.default.maxNumberOfTokens) { const parts = Math.ceil(encodedStr.length / config_1.default.maxNumberOfTokens); const targetLength = Math.ceil(paragraph.length / parts) - (metadata?.length || 0); const sentences = body.match(/[^.!?]+[.!?]+/g) || []; const result = []; let currentParagraph = `${title ? title + '\n' : ''}`; for (const sentence of sentences) { if (currentParagraph.length + sentence.length < targetLength) { currentParagraph += sentence; } else { currentParagraph += `\n${metadata ? metadata + '\n' : ''}`; result.push(currentParagraph.trim()); currentParagraph = `${title ? title + '\n' : ''}`; } } return result; } else return [paragraph]; } |