All files index.js

84.75% Statements 50/59
73.53% Branches 25/34
91.67% Functions 11/12
83.64% Lines 46/55
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141                  1x 1x 1x 1x 1x   1x                   1x 1x 1x 1x     1x   1x                 1x   1x 18x 659412x     659412x     659412x         659412x 659412x       659412x 659412x 25x           1x 19x 4x 15x             1x                   146536x         1x   1x 1x     1x     36635x 36634x   1x           18x                     659412x 341939x 341934x 317473x 170937x 146536x     146536x         146536x 146536x    
// @flow
 
import nodeCsv from 'csv';
import moment from 'moment';
import max from 'lodash.max';
import fill from 'lodash.fill';
import validator from 'validator';
import type { Readable } from 'stream';
 
const DATE = 'DATE';
const INTEGER = 'INTEGER';
const VARCHAR = 'VARCHAR';
const DOUBLE = 'DOUBLE PRECISION';
const EMPTY = 'EMPTY';
 
const precedence = {
  [VARCHAR]: 0,
  [DATE]: 1,
  [DOUBLE]: 2,
  [INTEGER]: 3,
  [EMPTY]: 4
};
 
// Parses a csv and returns the data and SQL types
export default async function(csv: Readable, offset: number, limit: number): Promise<Object> {
  Eif (isNaN(offset)) {
    offset = 0;
  } Eif (isNaN(limit)) {
    limit = Infinity;
  }
 
  const { columns, rows } = await parseCsv(csv, offset, limit);
 
  return {
    columns,
    colTypes: inferTypes(rows, columns),
    data: rows
  };
}
 
// Infer the types of the csv
function inferTypes(csv: Array<Array<any>>, columns: Array<string>): Array<string> {
  const colTypes = fill(Array(csv[0].length), EMPTY);
 
  for (let col = 0; col < csv[0].length; ++col) {
    for (let row = 0; row < csv.length; ++row) {
      const value = csv[row][col];
 
      // Handle empty strings
      Iif (!value || value.match(/^$|^\s+$/)) {
        csv[row][col] = null;
        continue;
      } else Iif (value === null) {
        continue;
      }
 
      // Remove unnecessary symbols
      Eif (!validator.isEmail(value)) {
        csv[row][col] = value.replace(/[^\w\s\.\-\/]/g, '');
      }
 
      // Determine the type of the value, and assign it if it has higher precedence
      const colType = determineType(csv[row][col]);
      if (precedence[colType] < precedence[colTypes[col]]) {
        colTypes[col] = colType;
      }
    }
  }
 
  // Remove empty columns and coltypes
  for (let i = colTypes.length; i > -1; --i) {
    if (colTypes[i] === VARCHAR) {
      colTypes[i] = `VARCHAR(${colMax(csv, i)})`;
    } else Iif (colTypes[i] === EMPTY) {
      columns.splice(i, 1);
      colTypes.splice(i, 1);
      removeColumn(csv, i);
    }
  }
 
  return colTypes;
}
 
function removeColumn(matrix: Array<Array<any>>, col: number) {
  for (let row = 0; row < matrix.length; ++row) {
    matrix[row].splice(col, 1);
  }
}
 
function colMax(matrix: Array<Array<any>>, col: number): number {
  return max(matrix.map(row => row[col] ? row[col].length : -1));
}
 
// Transform a CSV stream into JS arrays and objects
function parseCsv(csvStream: Readable, offset: number, limit: number): Promise<Object> {
  return new Promise(resolve => {
    let columns;
    let numRows = 0;
    const rows = [];
 
    // Stream the rows of the CSV into an array
    csvStream
      .pipe(nodeCsv.parse())
      .on('data', row => {
        if (numRows++ === offset) columns = row;
        else Eif (numRows > offset && numRows <= limit + 1 + offset) rows.push(row);
      })
      .on('finish', () => resolve({ columns: columns.map(sanitize), rows }));
  });
}
 
// Make table and column names database friendly
function sanitize(str: string): string {
  return str
    .toLowerCase() // make all characters lowercase
    .replace(/[\s\\\/\-\.:]+/g, '_') // replace certain characters with underscores
    .replace(/[^\w]/g, '') // remove the rest of the non alphanumeric characters
    .trim() // remove trailing and leading whitespace
    .replace(/_+$|^_+/g, '') // replace leading and trailing underscores
    .replace(/_+/g, '_'); // replace instances of multiple underscores with a single
}
 
// Determine the type of a string
function determineType(elem: string): string {
  if (validator.isInt(elem)) {
    if (parseInt(elem, 10) > 2147483647) return DOUBLE;
    return INTEGER;
  } else if (validator.isFloat(elem)) {
    return DOUBLE;
  } else Iif (isDate(elem)) {
    return DATE;
  }
  return VARCHAR;
}
 
// Check if a string is a date
function isDate(input: string): boolean {
  const formats = ['M/D/YYYY', 'DD/MM/YYYY', 'MM/DD/YYYY', 'YYYY-MM-DD'];
  return moment(input, formats, true).isValid();
}