1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 18x 659412x 659412x 659412x 659412x 659412x 659412x 659412x 25x 1x 19x 4x 15x 1x 146536x 1x 1x 1x 1x 36635x 36634x 1x 18x 659412x 341939x 341934x 317473x 170937x 146536x 146536x 146536x 146536x | // @flow import nodeCsv from 'csv'; import moment from 'moment'; import max from 'lodash.max'; import fill from 'lodash.fill'; import validator from 'validator'; import type { Readable } from 'stream'; const DATE = 'DATE'; const INTEGER = 'INTEGER'; const VARCHAR = 'VARCHAR'; const DOUBLE = 'DOUBLE PRECISION'; const EMPTY = 'EMPTY'; const precedence = { [VARCHAR]: 0, [DATE]: 1, [DOUBLE]: 2, [INTEGER]: 3, [EMPTY]: 4 }; // Parses a csv and returns the data and SQL types export default async function(csv: Readable, offset: number, limit: number): Promise<Object> { Eif (isNaN(offset)) { offset = 0; } Eif (isNaN(limit)) { limit = Infinity; } const { columns, rows } = await parseCsv(csv, offset, limit); return { columns, colTypes: inferTypes(rows, columns), data: rows }; } // Infer the types of the csv function inferTypes(csv: Array<Array<any>>, columns: Array<string>): Array<string> { const colTypes = fill(Array(csv[0].length), EMPTY); for (let col = 0; col < csv[0].length; ++col) { for (let row = 0; row < csv.length; ++row) { const value = csv[row][col]; // Handle empty strings Iif (!value || value.match(/^$|^\s+$/)) { csv[row][col] = null; continue; } else Iif (value === null) { continue; } // Remove unnecessary symbols Eif (!validator.isEmail(value)) { csv[row][col] = value.replace(/[^\w\s\.\-\/]/g, ''); } // Determine the type of the value, and assign it if it has higher precedence const colType = determineType(csv[row][col]); if (precedence[colType] < precedence[colTypes[col]]) { colTypes[col] = colType; } } } // Remove empty columns and coltypes for (let i = colTypes.length; i > -1; --i) { if (colTypes[i] === VARCHAR) { colTypes[i] = `VARCHAR(${colMax(csv, i)})`; } else Iif (colTypes[i] === EMPTY) { columns.splice(i, 1); colTypes.splice(i, 1); removeColumn(csv, i); } } return colTypes; } function removeColumn(matrix: Array<Array<any>>, col: number) { for (let row = 0; row < matrix.length; ++row) { matrix[row].splice(col, 1); } } function colMax(matrix: Array<Array<any>>, col: number): number { return max(matrix.map(row => row[col] ? row[col].length : -1)); } // Transform a CSV stream into JS arrays and objects function parseCsv(csvStream: Readable, offset: number, limit: number): Promise<Object> { return new Promise(resolve => { let columns; let numRows = 0; const rows = []; // Stream the rows of the CSV into an array csvStream .pipe(nodeCsv.parse()) .on('data', row => { if (numRows++ === offset) columns = row; else Eif (numRows > offset && numRows <= limit + 1 + offset) rows.push(row); }) .on('finish', () => resolve({ columns: columns.map(sanitize), rows })); }); } // Make table and column names database friendly function sanitize(str: string): string { return str .toLowerCase() // make all characters lowercase .replace(/[\s\\\/\-\.:]+/g, '_') // replace certain characters with underscores .replace(/[^\w]/g, '') // remove the rest of the non alphanumeric characters .trim() // remove trailing and leading whitespace .replace(/_+$|^_+/g, '') // replace leading and trailing underscores .replace(/_+/g, '_'); // replace instances of multiple underscores with a single } // Determine the type of a string function determineType(elem: string): string { if (validator.isInt(elem)) { if (parseInt(elem, 10) > 2147483647) return DOUBLE; return INTEGER; } else if (validator.isFloat(elem)) { return DOUBLE; } else Iif (isDate(elem)) { return DATE; } return VARCHAR; } // Check if a string is a date function isDate(input: string): boolean { const formats = ['M/D/YYYY', 'DD/MM/YYYY', 'MM/DD/YYYY', 'YYYY-MM-DD']; return moment(input, formats, true).isValid(); } |