Source: datamodel.js

/* eslint-disable default-case */

import { FieldType } from './enums';
import {
    persistDerivation,
    getRootGroupByModel,
    propagateToAllDataModels,
    getRootDataModel,
    propagateImmutableActions
} from './helper';
import { DM_DERIVATIVES, PROPAGATION } from './constants';
import {
    dataBuilder,
    rowDiffsetIterator,
    groupBy
} from './operator';
import { createBinnedFieldData } from './operator/bucket-creator';
import Relation from './relation';
import reducerStore from './utils/reducer-store';
import createFields from './field-creator';

/**
 * DataModel is an in-browser representation of tabular data. It supports
 * {@link https://en.wikipedia.org/wiki/Relational_algebra | relational algebra} operators as well as generic data
 * processing opearators.
 * DataModel extends {@link /muze/docs/api-Relation | Relation} class which defines all the relational algebra
 * opreators. DataModel gives definition of generic data processing operators which are not relational algebra complient
 * but needed for ease of use.
 *
 * @public
 * @class
 * @extends Relation
 * @module DataModel
 */
class DataModel extends Relation {
    /**
     * Creates a new DataModel instance by providing data and schema. Data could be in the form of
     * - Flat JSON
     * - DSV String
     * - 2D Array
     *
     * By default DataModel finds suitable adapter to serialize the data. DataModel also expects a
     * {@link /muze/docs/api-schema | schema} for identifying the variables present in data.
     *
     * @constructor
     * @example
     *  const DataModel = muze.DataModel; // Retrieves reference to DataModel from muze namespace
     *  const data = [
     *      { Name:'chevrolet chevelle malibu', Miles_per_Gallon:18, Cylinders:8, Horsepower:130, Year:'1970' },
     *      { Name:'ford fiesta', Miles_per_Gallon:36.1, Cylinders:4, Horsepower:66, Year:'1978' },
     *      { Name:'bmw 320i', Miles_per_Gallon:21.5, Cylinders:4, Horsepower:110, Year:'1977' },
     *      { Name:'chevrolet chevelle malibu', Miles_per_Gallon:18, Cylinders:8, Horsepower:130, Year:'1970' },
     *      { Name:'ford fiesta', Miles_per_Gallon:36.1, Cylinders:4, Horsepower:66, Year:'1978' },
     *      { Name:'bmw 320i', Miles_per_Gallon:21.5, Cylinders:4, Horsepower:110, Year:'1977' }
     *  ];
     *  const schema = [
     *      { name: 'Name', type: 'dimension' },
     *      { name: 'Miles_per_Gallon', type: 'measure', unit : 'gallon', numberformat: val => `${val}G`},
     *      { name: 'Cylinders', type: 'dimension' },
     *      { name: 'Horsepower', type: 'measure' },
     *      { name: 'Year', type: 'dimension', subtype: 'datetime', format: '%Y' }
     * ];
     * const dm = new DataModel(data, schema, { name: 'Cars' });
     * printDM(dm); // internal function to print datamodel, available only in this interface
     *
     * @public
     *
     * @param {Array.<Object> | string | Array.<Array>} data Input data in any of the mentioned formats. Checkout
     *      {@link /muze/docs/introduction-to-datamodel#populating-datamodel-from-different-formats-of-data | this}
     *      example for practical example on how feed different data format.
     * @param {Array.<Schema>} schema Defination of the variables. Order of the variables in data and order of the
     *      variables in schema has to be same.
     * @param {object} [options] Optional arguments to specify more settings regarding the creation part
     * @param {string} [options.name] Name of the datamodel instance. If no name is given an auto generated name is
     *      assigned to the instance.
     * @param {string} [options.fieldSeparator=','] specify field separator type if the data is of type dsv string.
     */
    constructor (...args) {
        super(...args);

        this._onPropagation = [];
        this._sortingDetails = [];
    }

    /**
     * Reducers are simple functions which reduces an array of numbers to a representative number of the set.
     * Like an array of numbers `[10, 20, 5, 15]` can be reduced to `12.5` if average / mean reducer function is
     * applied. All the measure fields in datamodel (variables in data) needs a reducer to handle aggregation.
     *
     * @public
     * @static
     *
     * @return {ReducerStore} Singleton instance of {@link /muze/docs/api-reducerstore | ReducerStore}.
     */
    static get Reducers () {
        return reducerStore;
    }

    /**
     * Retrieve the data attached to an instance in JSON format.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *  const serializedData = dm.getData({
     *      order: 'column',
     *      formatter: {
     *          origin: (val) => val === 'European Union' ? 'EU' : val
     *      }
     *  });
     *  console.log(serializedData);
     *  //@preamble_start
     *  });
     *  //@preamble_end
     *
     * @public
     *
     * @param {Object} [options] Options to control how the raw data is to be returned.
     * @param {string} [options.order='row'] Defines if data is retieved in row order or column order. Possible values
     *      are `'rows'` and `'columns'`
     * @param {Function} [options.formatter=null] Formats the output data. This expects an object, where the keys are
     *      the name of the variable needs to be formatted. The formatter function is called for each row passing the
     *      value of the cell for a particular row as arguments. The formatter is a function in the form of
     *      ```
     *      function (value, rowId, schema) => { ... }
     *      ```
     *      Know more about {@link Fomatter}.
     *
     * @return {Array} Returns a multidimensional array of the data with schema. The return format looks like
     *      ```
     *      {
     *           data,
     *           schema
     *      }
     *      ```
     */
    getData (options) {
        const defOptions = {
            order: 'row',
            formatter: null,
            withUid: false,
            getAllFields: false,
            sort: []
        };
        options = Object.assign({}, defOptions, options);
        const fields = this.getPartialFieldspace().fields;

        const dataGenerated = dataBuilder.call(
            this,
            this.getPartialFieldspace().fields,
            this._rowDiffset,
            options.getAllFields ? fields.map(d => d.name).join() : this._colIdentifier,
            options.sort,
            {
                columnWise: options.order === 'column',
                addUid: !!options.withUid
            }
        );

        if (!options.formatter) {
            return dataGenerated;
        }

        const { formatter } = options;
        const { data, schema, uids } = dataGenerated;
        const fieldNames = schema.map((e => e.name));
        const fmtFieldNames = Object.keys(formatter);
        const fmtFieldIdx = fmtFieldNames.reduce((acc, next) => {
            const idx = fieldNames.indexOf(next);
            if (idx !== -1) {
                acc.push([idx, formatter[next]]);
            }
            return acc;
        }, []);

        if (options.order === 'column') {
            fmtFieldIdx.forEach((elem) => {
                const fIdx = elem[0];
                const fmtFn = elem[1];

                data[fIdx].forEach((datum, datumIdx) => {
                    data[fIdx][datumIdx] = fmtFn.call(
                        undefined,
                        datum,
                        uids[datumIdx],
                        schema[fIdx]
                    );
                });
            });
        } else {
            data.forEach((datum, datumIdx) => {
                fmtFieldIdx.forEach((elem) => {
                    const fIdx = elem[0];
                    const fmtFn = elem[1];

                    datum[fIdx] = fmtFn.call(
                        undefined,
                        datum[fIdx],
                        uids[datumIdx],
                        schema[fIdx]
                    );
                });
            });
        }

        return dataGenerated;
    }

    /**
     * Groups the data using particular dimensions by reducing measures. It expects a list of dimensions using which
     * it projects the datamodel and perform aggregations to reduce the duplicate tuples. Refer this
     * {@link /muze/docs/datamodel-operators#groupby | document} to know the intuition behind groupBy.
     *
     * DataModel by default provides definition of few {@link /muze/docs/api-reducer | Reducers} for reducing a measure
     * when aggregation is required for `groupBy`.
     * {@link ReducerStore | User defined reducers} can also be registered.
     *
     * This is the chained implementation of `groupBy`.
     * `groupBy` also supports {@link /muze/api/datamodel#compose-groupby| composability}.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *  const outputDM = dm.groupBy(['Year'], { horsepower: 'max' } );
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     * @text
     * During `groupBy`, only the dimensions passed as the first parameter gets projected. However all the measures
     * gets projected automatically whether its mentioned as second parameter or not.
     *
     * @public
     *
     * @param {Array.<string>} fieldsArr Array containing the name of dimensions using which groupBy should happen.
     * @param {Object} [reducers={}] A simple key value pair whose key is the variable name and value is the name of the
     *      reducer. If its not passed, or any variable is ommitted from the object, default aggregation function is
     *      used from the schema of the variable.
     *
     * @return {DataModel} Returns a new DataModel instance after performing the groupby.
     */
    groupBy (fieldsArr, reducers = {}, config = { saveChild: true }) {
        const groupByString = `${fieldsArr.join()}`;
        let params = [this, fieldsArr, reducers];
        const newDataModel = groupBy(...params);

        if (config.saveChild) {
            this._children.push(newDataModel);
            persistDerivation(
                newDataModel,
                DM_DERIVATIVES.GROUPBY,
                { fieldsArr, groupByString, defaultReducer: reducerStore.defaultReducer() },
                reducers
            );
        }

        newDataModel._parent = this;
        return newDataModel;
    }

    /**
     * Performs sorting according to the specified sorting details.Like every other operator it doesn't mutate the
     * current DataModel instance on which it was called, instead returns a new DataModel instance containing the sorted
     * data.
     *
     * DataModel support multi level sorting by listing the variables using which sorting needs to be performed and
     * the type of sorting `ASC` or `DESC`.
     *
     * In the following example, data is sorted by `Origin` field in `DESC` order in first level followed by another
     * level of sorting by `Acceleration` in `ASC` order.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *  let outputDM = dm.sort([
     *      ['Origin', 'DESC'],
     *      ['Acceleration'] // Default value is ASC
     *  ]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * DataModel also provides another sorting mechanism out of the box where order is applied to a variable by
     * comparing values of another variable.
     * Assume an instance of DataModel created from {@link /static/cars.json | this} data. Now, the data in this
     * model can be sorted by *Origin* field according to the average value of all *Acceleration* for a
     * particular *Origin* value. We would expect an output where *Origin* with lowest average *Acceleration* would come
     * first, then the next lower average, all the way to Origin with the highest average *Acceleration* is the last
     * entry of the array.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const DataModel = muze.DataModel;
     *  const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted
     *  // from muze namespace and assigned to DataModel variable.
     *  const avg = DataModel.Stats.avg;
     *  const outputDM = dm.sort([
     *      ['Origin', ['Acceleration', (a, b) => avg(a.Acceleration) - avg(b.Acceleration)]]
     *  ]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * If `groupBy` is applied post sorting, then sorting order is destroyed.
     *
     * @public
     *
     * @param {Array.<Array>} sortingDetails - Sorting details based on which the sorting will be performed.
     * @return {DataModel} Returns a new instance of DataModel with sorted data.
     */
    sort (sortingDetails) {
        const rawData = this.getData({
            order: 'row',
            sort: sortingDetails
        });
        const header = rawData.schema.map(field => field.name);
        const dataInCSVArr = [header].concat(rawData.data);

        const sortedDm = new this.constructor(dataInCSVArr, rawData.schema, { dataFormat: 'DSVArr' });
        sortedDm._sortingDetails = sortingDetails;
        return sortedDm;
    }

    addField (field) {
        const fieldName = field.fieldName();
        this._colIdentifier += `,${fieldName}`;
        const partialFieldspace = this._partialFieldspace;

        if (!partialFieldspace.fieldsObj()[field.fieldName()]) {
            partialFieldspace.fields.push(field);
        } else {
            const fieldIndex = partialFieldspace.fields.findIndex(fieldinst => fieldinst.name === fieldName);
            fieldIndex >= 0 && (partialFieldspace.fields[fieldIndex] = field);
        }

        this.__calculateFieldspace().calculateFieldsConfig();
        return this;
    }

     /**
     * Creates a new variable calculated from existing variable. This method expects definition of the newly created
     * variable and a function which resolves value of the new variable from existing variables.
     *
     * Creates a new measure based on existing variables
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *  const outputDM = dm.calculateVariable({
     *      name: 'powerToWeight',
     *      type: 'measure' // Schema of variable
     *  }, ['Horsepower', 'Weight_in_lbs', (hp, weight) => hp / weight ]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * Creates a new dimension based on existing variables
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *  const outputDM= dm.calculateVariable(
     *     {
     *       name: 'Efficiency',
     *       type: 'dimension'
     *     }, ['Horsepower', (hp) => {
     *      if (hp < 80) { return 'low'; }
     *      else if (hp < 120) { return 'moderate'; }
     *      else { return 'high' }
     *  }]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @public
     *
     * @param {Schema} schema Schema of newly defined variable
     * @param {VariableResolver} resolver {@link /muze/docs/api-variableresolver | Resolver} format to resolve the
     *      current variable
     *
     * @return {DataModel} Instance of DataModel with the new field
     */
    calculateVariable (schema, dependency, config = { saveChild: true, replaceVar: false }) {
        const fieldsConfig = this.getFieldsConfig();
        const depVars = dependency.slice(0, dependency.length - 1);
        const retrieveFn = dependency[dependency.length - 1];

        if (fieldsConfig[schema.name] && !config.replaceVar) {
            throw new Error(`${schema.name} field already exists in model.`);
        }
        const depFieldIndices = depVars.map((field) => {
            const fieldSpec = fieldsConfig[field];
            if (!fieldSpec) {
                // @todo dont throw error here, use warning in production mode
                throw new Error(`${field} is not a valid column name.`);
            }
            return fieldSpec.index;
        });

        let clone = this.clone();

        const fs = clone.getFieldspace().fields;
        const suppliedFields = depFieldIndices.map(idx => fs[idx]);

        const computedValues = [];
        rowDiffsetIterator(clone._rowDiffset, (i) => {
            const fieldsData = suppliedFields.map(field => field.data[i]);
            computedValues[i] = retrieveFn(...fieldsData, i, fs);
        });
        const [field] = createFields([computedValues], [schema], [schema.name]);
        clone.addField(field);

        if (config.saveChild) {
            persistDerivation(clone, DM_DERIVATIVES.CAL_VAR, { config: schema, fields: depVars }, retrieveFn);
        }

        return clone;
    }

    /**
     * Propagates changes across all the connected DataModel instances.
     *
     * @param {Array} identifiers - A list of identifiers that were interacted with.
     * @param {Object} payload - The interaction specific details.
     *
     * @return {DataModel} DataModel instance.
     */
    propagate (identifiers, payload, config = {}) {
        const isMutableAction = config.isMutableAction;
        const propagationSourceId = config.sourceId;
        const rootModel = getRootDataModel(this);
        const propagationNameSpace = rootModel._propagationNameSpace;
        const rootGroupByModel = getRootGroupByModel(this);
        const rootModels = {
            groupByModel: rootGroupByModel,
            model: rootModel
        };

        propagateToAllDataModels(identifiers, rootModels, {
            propagationNameSpace,
            payload,
            propagationSourceId
        });

        if (isMutableAction) {
            propagateImmutableActions(propagationNameSpace, rootModels, propagationSourceId);
        }
        return this;
    }

    addToPropNamespace (sourceId, config = {}) {
        let sourceNamespace;
        const actionName = config.actionName;
        const payload = config.payload;
        const isMutableAction = config.isMutableAction;
        const rootModel = getRootDataModel(this);
        const propagationNameSpace = rootModel._propagationNameSpace;
        const criteria = config.criteria;

        if (isMutableAction) {
            !propagationNameSpace.mutableActions[sourceId] && (propagationNameSpace.mutableActions[sourceId] = {});
            sourceNamespace = propagationNameSpace.mutableActions[sourceId];
        } else {
            !propagationNameSpace.immutableActions[sourceId] && (propagationNameSpace.immutableActions[sourceId] = {});
            sourceNamespace = propagationNameSpace.immutableActions[sourceId];
        }

        if (criteria === null) {
            delete sourceNamespace[actionName];
        } else {
            sourceNamespace[actionName] = {
                criteria,
                payload
            };
        }

        return this;
    }

    /**
     * Associates a callback with an event name.
     *
     * @param {string} eventName - The name of the event.
     * @param {Function} callback - The callback to invoke.
     * @return {DataModel} Returns this current DataModel instance itself.
     */
    on (eventName, callback) {
        switch (eventName) {
        case PROPAGATION:
            this._onPropagation.push(callback);
            break;
        }
        return this;
    }

    /**
     * Unsubscribes the callbacks for the provided event name.
     *
     * @param {string} eventName - The name of the event to unsubscribe.
     * @return {DataModel} Returns the current DataModel instance itself.
     */
    unsubscribe (eventName) {
        switch (eventName) {
        case PROPAGATION:
            this._onPropagation = [];
            break;

        }
        return this;
    }

    /**
     * This method is used to invoke the method associated with propagation.
     *
     * @param {Object} payload The interaction payload.
     * @param {DataModel} identifiers The propagated DataModel.
     * @memberof DataModel
     */
    handlePropagation (payload) {
        let propListeners = this._onPropagation;
        propListeners.forEach(fn => fn.call(this, payload));
    }

    /**
     * Perfoms binning on a measure field based on a binning configuration. This method does not aggregate the number of
     * rows present in DataModel instance after binning, it just adds a new field with the binned value. Refer binning
     * {@link example_of_binning | example} to have a intuition of what binning is and the use case.
     *
     * Binning can be configured by
     * - providing custom bin configuration with non uniform buckets
     * - providing bin count
     * - providing each bin size
     *
     * When custom buckets are provided as part of binning configuration
     * @example
     *  // DataModel already prepared and assigned to dm vairable
     *  const buckets = {
     *      start: 30
     *      stops: [80, 100, 110]
     *  };
     *  const config = { buckets, name: 'binnedHP' }
     *  const binDM = dataModel.bin('horsepower', config);\
     *
     * @text
     * When `binCount` is defined as part of binning configuration
     * @example
     *  // DataModel already prepared and assigned to dm vairable
     *  const config = { binCount: 5, name: 'binnedHP' }
     *  const binDM = dataModel.bin('horsepower', config);
     *
     * @text
     * When `binSize` is defined as part of binning configuration
     * @example
     *  // DataModel already prepared and assigned to dm vairable
     *  const config = { binSize: 200, name: 'binnedHorsepower' }
     *  const binDM = dataModel.bin('horsepower', config);
     *
     * @todo Fix interaction of binning and then make it public
     * @private
     *
     * @param {String} name Name of measure which will be used to create bin
     * @param {Object} config Config required for bin creation
     * @param {Array.<Number>} config.bucketObj.stops Defination of bucket ranges. Two subsequent number from arrays
     *      are picked and a range is created. The first number from range is inclusive and the second number from range
     *      is exclusive.
     * @param {Number} [config.bucketObj.startAt] Force the start of the bin from a particular number.
     *      If not mentioned, the start of the bin or the lower domain of the data if stops is not mentioned, else its
     *      the first value of the stop.
     * @param {Number} config.binSize Bucket size for each bin
     * @param {Number} config.binCount Number of bins which will be created
     * @param {String} config.name Name of the new binned field to be created
     *
     * @returns {DataModel} Instance of new DataModel with the newly created bin.
     */
    bin (measureName, config = { }) {
        const clone = this.clone();
        const binFieldName = config.name || `${measureName}_binned`;
        if (this.getFieldsConfig()[binFieldName] || !this.getFieldsConfig()[measureName]) {
            throw new Error(`Field ${measureName} already exists.`);
        }
        const field = this._partialFieldspace.fields.find(currfield => currfield.name === measureName);
        const dataSet = createBinnedFieldData(field, this._rowDiffset, config);
        const binField = createFields([dataSet.data], [
            {
                name: binFieldName,
                type: FieldType.MEASURE,
                subtype: 'discrete', // @todo : DimensionSubtype
                bins: {
                    range: dataSet.range,
                    mid: dataSet.mid
                }
            }], [binFieldName])[0];
        clone.addField(binField);
        persistDerivation(clone, DM_DERIVATIVES.BIN, { measureName, config, binFieldName }, null);
        return clone;
    }
}

export default DataModel;