All files / Cognigy-CLI/build/lib/knowledgeAI extract.js

0% Statements 0/98
0% Branches 0/79
0% Functions 0/13
0% Lines 0/87

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138                                                                                                                                                                                                                                                                                   
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    var desc = Object.getOwnPropertyDescriptor(m, k);
    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
      desc = { enumerable: true, get: function() { return m[k]; } };
    }
    Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
    Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
    o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
    var ownKeys = function(o) {
        ownKeys = Object.getOwnPropertyNames || function (o) {
            var ar = [];
            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
            return ar;
        };
        return ownKeys(o);
    };
    return function (mod) {
        if (mod && mod.__esModule) return mod;
        var result = {};
        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
        __setModuleDefault(result, mod);
        return result;
    };
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.extract = void 0;
const fs = __importStar(require("fs"));
const cli_spinner_1 = require("cli-spinner");
const diffbotExtractor_1 = require("./extractionProvider/diffbotExtractor");
const lsExtractor_1 = require("./extractionProvider/lsExtractor");
const unstructuredExtractor_1 = require("./extractionProvider/unstructuredExtractor");
function writeResultsToFile(outputFilePath, content) {
    content = content.replace(/\n\n\n/gi, '\n\n');
    return new Promise((resolve, reject) => {
        fs.writeFile(outputFilePath, content, 'utf8', (error) => {
            if (error) {
                reject(error);
            }
            else {
                resolve();
            }
        });
    });
}
const extract = async (type, options) => {
    if (type === 'ctxt') {
        throw new Error('Can not extract from ctxt to ctxt!');
    }
    const spinner = new cli_spinner_1.Spinner(`Extracting chunks into file... %s`);
    spinner.setSpinnerString('|/-\\');
    if (options.additionalParameters) {
        try {
            options.additionalParameters = JSON.parse(options.additionalParameters);
        }
        catch (err) {
            console.warn('Invalid JSON config passed. Using default parameters.');
        }
    }
    spinner.start();
    let content;
    switch (type) {
        case 'diffbot':
            content = await (0, diffbotExtractor_1.diffbotExtractor)(options);
            break;
        case 'pdf':
        case 'text':
        case 'csv':
        case 'epub':
        case 'json':
        case 'jsonl':
        case 'srt':
        case 'md':
        case 'docx':
            if (!options.inputFile) {
                spinner.stop();
                console.error('Missing required parameter --inputFile');
                process.exit(1);
            }
            if (!options.outputFile) {
                spinner.stop();
                console.error('Missing required parameter --outputFile');
                process.exit(1);
            }
            if (!options.forceLocal) {
                content = await (0, unstructuredExtractor_1.unstructuredExtractor)(options);
            }
            if (!content) {
                if (!options.forceLocal) {
                    console.warn(`Error when attempting to extract text from ${options.inputFile} via API, falling back to local processing`);
                }
                content = await (0, lsExtractor_1.lsExtractor)(type, options);
            }
            break;
        case 'other':
            content = (await (0, unstructuredExtractor_1.unstructuredExtractor)(options)) || '';
            break;
        case 'cheerio':
        case 'playwright':
            if (!options.url && !options.inputFile) {
                spinner.stop();
                console.error('Missing required parameter --url');
                process.exit(1);
            }
            content = await (0, lsExtractor_1.lsExtractor)(type, options);
            break;
        default:
            spinner.stop();
            console.error(`Invalid extraction type '${type}'. Please refer to the documentation.'`);
            process.exit(1);
    }
    spinner.stop();
    if (content) {
        const cContent = '`version: 1`\n\n' + content;
        let outputFilePath = options.outputFile;
        if (outputFilePath.split('.').pop() !== 'ctxt') {
            outputFilePath = outputFilePath + '.ctxt';
        }
        await writeResultsToFile(outputFilePath, cContent);
        if (options.verbose) {
            console.log('Paragraphs written to file: ', outputFilePath);
        }
    }
    else {
        console.error("Content couldn't be extracted, no output file written.");
    }
};
exports.extract = extract;