Source: relation.js

import { FilteringMode } from './enums';
import { getUniqueId } from './utils';
import { persistDerivation, updateFields, cloneWithSelect, cloneWithProject, updateData } from './helper';
import { crossProduct, difference, naturalJoinFilter, union } from './operator';
import { DM_DERIVATIVES } from './constants';

/**
 * Relation provides the definitions of basic operators of relational algebra like *selection*, *projection*, *union*,
 * *difference* etc.
 *
 * It is extended by {@link DataModel} to inherit the functionalities of relational algebra concept. Its not recommended
 * to instantiate this class and use it.
 *
 * @class
 * @public
 * @module Relation
 * @segment DataModel
 */
class Relation {

    /**
     * Creates a new Relation instance by providing underlying data and schema.
     *
     * @private
     *
     * @param {Object | string | Relation} data - The input tabular data in dsv or json format or
     * an existing Relation instance object.
     * @param {Array} schema - An array of data schema.
     * @param {Object} [options] - The optional options.
     */
    constructor (...params) {
        let source;

        this._parent = null;
        this._derivation = [];
        this._children = [];

        if (params.length === 1 && ((source = params[0]) instanceof Relation)) {
            // parent datamodel was passed as part of source
            this._colIdentifier = source._colIdentifier;
            this._rowDiffset = source._rowDiffset;
            this._parent = source;
            this._partialFieldspace = this._parent._partialFieldspace;
            this._fieldStoreName = getUniqueId();
            this.__calculateFieldspace().calculateFieldsConfig();
        } else {
            updateData(this, ...params);
            this._fieldStoreName = this._partialFieldspace.name;
            this.__calculateFieldspace().calculateFieldsConfig();
            this._propagationNameSpace = {
                mutableActions: {},
                immutableActions: {}
            };
        }
    }

    /**
     * Retrieves the {@link Schema | schema} details for every {@link Field | field} in an array format.
     *
     * @public
     *
     * @return {Schema} Array of fields schema.
     *      ```
     *      [
     *          { name: 'Name', type: 'dimension' },
     *          { name: 'Miles_per_Gallon', type: 'measure', numberFormat: (val) => `${val} miles / gallon` },
     *          { name: 'Cylinder', type: 'dimension' },
     *          { name: 'Displacement', type: 'measure', defAggFn: 'max' },
     *          { name: 'HorsePower', type: 'measure', defAggFn: 'max' },
     *          { name: 'Weight_in_lbs', type: 'measure', defAggFn: 'avg',  },
     *          { name: 'Acceleration', type: 'measure', defAggFn: 'avg' },
     *          { name: 'Year', type: 'dimension', subtype: 'datetime', format: '%Y' },
     *          { name: 'Origin' }
     *      ]
     *      ```
     */
    getSchema () {
        return this.getFieldspace().fields.map(d => d.schema);
    }

    /**
     * Returns the name of the {@link DataModel} instance. If no name was specified during {@link DataModel}
     * initialization, then it returns a auto-generated name.
     *
     * @public
     *
     * @return {string} Name of the DataModel instance.
     */
    getName() {
        return this._fieldStoreName;
    }

    getFieldspace () {
        return this._fieldspace;
    }

    __calculateFieldspace () {
        this._fieldspace = updateFields([this._rowDiffset, this._colIdentifier],
             this.getPartialFieldspace(), this._fieldStoreName);
        return this;
    }

    getPartialFieldspace () {
        return this._partialFieldspace;
    }

    /**
     * Performs {@link https://en.wikipedia.org/wiki/Cartesian_product | cross-product} between two {@link DataModel}
     * instances with an optional predicate which determines which tuples should be included and returns a new
     * {@link DataModel} instance containing the results. This operation is also called theta join.
     *
     * Cross product takes two set and create one set where each value of one set is paired with each value of another
     * set.
     *
     * This method takes an optional predicate which filters the generated result rows. The predicate is called for
     * every tuple. If the predicate returns true the combined row is included in the resulatant table.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  // Creates two small DataModel instance from the original DataModel instance, which will be joined.
     *  let makerDM = dm.groupBy(['Origin', 'Maker']).project(['Origin', 'Maker']);
     *  let nameDM = dm.project(['Name','Miles_per_Gallon']);
     *
     *  let outputDM = makerDM.join(nameDM,
     *      (makerDM, nameDM) => makerDM.Maker.value === nameDM.Name.value.split(/\s/)[0]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * This is the most genereic version of joining. There are few variations of join which are exposed as
     * {@link muze/api/datamodel#functional-operators | functional operator}.
     *
     * @public
     *
     * @param {DataModel} joinWith - The DataModel to be joined with the current instance DataModel.
     * @param {SelectionPredicate} filterFn - The predicate function that will filter the result of the crossProduct.
     *
     * @return {DataModel} New DataModel instance created after joining.
     */
    join (joinWith, filterFn) {
        return crossProduct(this, joinWith, filterFn);
    }

    /**
     * {@link https://www.geeksforgeeks.org/extended-operators-in-relational-algebra | Natural join} is a special kind
     * of joining where filtering of rows are performed internally by resolving common fields are from both table and
     * the rows with common value are included.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  // Creates two small DataModel instance from the original DataModel instance, which will be joined.
     *  let makerDM = dm.groupBy(['Origin', 'Maker']).project(['Origin', 'Maker']);
     *  let nameDM = dm.project(['Name','Miles_per_Gallon'])
     *
     *  let outputDM = makerDM.naturalJoin(nameDM);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as
     * {@link /muze/api/datamodel/functional-operator | functional operator}.
     *
     * @public
     *
     * @param {DataModel} joinWith - The DataModel with which the current instance of DataModel on which the method is
     *      called will be joined.
     * @return {DataModel} New DataModel instance created after joining.
     */
    naturalJoin (joinWith) {
        return crossProduct(this, joinWith, naturalJoinFilter(this, joinWith), true);
    }

    /**
     * Union operation can be termed as vertical stacking of all rows from both the DataModel instances, provided that
     * both of the {@link DataModel} instances should have same column names.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  // Creates two small DataModel instance from the original DataModel instance, one only for european cars,
     *  // another for cars from USA.
     *  usaMakerDM = dm.select(fields => fields.Origin.value === 'USA');
     *  euroMakerDM = dm.select(fields => fields.Origin.value === 'Europe');
     *
     *  outputDM = usaMakerDM.union(euroMakerDM);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * This is chained version of `union` operator. `union` can also be used as
     * {@link /muze/api/datamodel/functional-operator | functional operator}.
     *
     * @public
     *
     * @param {DataModel} unionWith - DataModel instance for which union has to be applied with the instance on which
     *      the method is called
     *
     * @return {DataModel} New DataModel instance with the result of the operation
     */
    union (unionWith) {
        return union(this, unionWith);
    }

    /**
     * Difference operation only include rows which are present in the datamodel on which it was called but not on the
     * one passed as argument.
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *      const data = params[0];
     *      const schema = params[1];
     *      const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  // Creates a DataModel instance only including USA
     *  usaMakerDM = dm.select(fields => fields.Origin.value === 'USA');
     *
     *  outputDM = dm.difference(usaMakerDM);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * This is chained version of `difference` operator. `difference` can also be used as
     * {@link /muze/api/datamodel/functional-operator | functional operator}.
     *
     * @public
     *
     * @param {DataModel} differenceWith - DataModel instance for which difference has to be applied with the instance
     *      on which the method is called
     * @return {DataModel} New DataModel instance with the result of the operation
     */
    difference (differenceWith) {
        return difference(this, differenceWith);
    }

    /**
     * Selection is a row filtering operation. It expects an predicate and an optional mode which control which all rows
     * should be included in the resultant DataModel instance.
     *
     * {@link SelectionPredicate} is a function which returns a boolean value for each tuple present in the DataModel.
     * For selection opearation the predicate function is called for each row of DataModel instance with the current row
     * passed as argument.
     *
     * After executing {@link SelectionPredicate} the rows are labeled as either an member of selection set or an member
     * of rejection set.
     *
     * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
     * resulatant datamodel.
     *
     * @warn
     * Note
     * Selection and rejection set is only a logical idea for concept explanation purpose.
     *
     * Selection with default mode `FilterningMode.NORMAL`
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  let outputDM= dt.select(fields => fields.Origin.value === 'USA')
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * Selection with mode `FilterningMode.INVERSE`
     *
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const DataModel = muze.DataModel;
     *  const dm = new DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted
     *  // from muze namespace and assigned to the variable DataModel
     *
     * const outputDM= dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.INVERSE })
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * with `FilteringMode.ALL` both selection and rejection set is returned.
     * ```
     * const [selDM, rejDM] = dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.ALL })
     *```
     * This is chained version of `select` operator. `select` can also be used as
     * {@link /muze/api/datamodel/functional-operator | functional operator}.
     *
     * @public
     *
     * @param {SelectionPredicate} selectFn - Predicate funciton which is called for each row with the current row
     *      ```
     *          function (row, i)  { ... }
     *      ```
     * @param {Object} [config] - The configuration object to control the inclusion exclusion of a row in resultant
     *      DataModel instance
     * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - The mode of the selection
     *
     * @return {DataModel} Returns the new DataModel instance(s) after operation.
     */
    select (selectFn, config) {
        const defConfig = {
            mode: FilteringMode.NORMAL,
            saveChild: true
        };
        config = Object.assign({}, defConfig, config);

        const cloneConfig = { saveChild: config.saveChild };
        let oDm;

        if (config.mode === FilteringMode.ALL) {
            const selectDm = cloneWithSelect(
                this,
                selectFn,
                { mode: FilteringMode.NORMAL },
                cloneConfig
            );
            const rejectDm = cloneWithSelect(
                this,
                selectFn,
                { mode: FilteringMode.INVERSE },
                cloneConfig
            );
            oDm = [selectDm, rejectDm];
        } else {
            oDm = cloneWithSelect(
                this,
                selectFn,
                config,
                cloneConfig
            );
        }

        return oDm;
    }

    /**
     * Retrieves a boolean value if the current {@link DataModel} instance has data.
     *
     * @example
     * const schema = [
     *    { name: 'CarName', type: 'dimension' },
     *    { name: 'HorsePower', type: 'measure' },
     *    { name: "Origin", type: 'dimension' }
     * ];
     * const data = [];
     *
     * const dt = new DataModel(schema, data);
     * console.log(dt.isEmpty());
     *
     * @private
     *
     * @return {Boolean} True if the datamodel has no data, otherwise false.
     */
    isEmpty () {
        return !this._rowDiffset.length || !this._colIdentifier.length;
    }

    /**
     * Creates a clone from the current DataModel instance with child parent relationship.
     *
     * @private
     * @param {boolean} [saveChild=true] - Whether the cloned instance would be recorded in the parent instance.
     * @return {DataModel} - Returns the newly cloned DataModel instance.
     */
    clone (saveChild = true, linkParent = true) {
        let retDataModel;
        if (linkParent === false) {
            const dataObj = this.getData({
                getAllFields: true
            });
            const data = dataObj.data;
            const schema = dataObj.schema;
            const jsonData = data.map((row) => {
                const rowObj = {};
                schema.forEach((field, i) => {
                    rowObj[field.name] = row[i];
                });
                return rowObj;
            });
            retDataModel = new this.constructor(jsonData, schema);
        }
        else {
            retDataModel = new this.constructor(this);
        }

        if (saveChild) {
            this._children.push(retDataModel);
        }
        return retDataModel;
    }

    /**
     * Projection is column (field) filtering operation. It expects list of fields' name and either include those or
     * exclude those based on {@link FilteringMode} on the resultant DataModel instance.
     *
     * Projection expects array of fields name based on which it creates the selection and rejection set. All the field
     * whose name is present in array goes in selection set and rest of the fields goes in rejection set.
     *
     * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
     * resulatant datamodel.
     *
     * @warning
     * Note
     * Selection and rejection set is only a logical idea for concept explanation purpose.
     *
     * Projection with default mode `FilterningMode.NORMAL`
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const dm = new muze.DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm.
     *
     *  let outputDM= dt.project(["Name", "HorsePower"]);
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * Projection with mode `FilterningMode.INVERSE`
     * @example
     *  //@preamble_start
     *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
     *  const data = params[0];
     *  const schema = params[1];
     *  const DataModel = muze.DataModel;
     *  const dm = new DataModel(data, schema);
     *  //@preamble_end
     *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
     *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted
     *  // from muze namespace and assigned to the variable DataModel
     *
     *  const outputDM= dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.INVERSE });
     *  //@preamble_start
     *  printDM(outputDM);
     *  });
     *  //@preamble_end
     *
     * @text
     * With `FilteringMode.ALL` both selection and rejection set is returned.
     * ```
     * const [selDM, rejDM] = dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.ALL})
     *```
     * This is chained version of `select` operator. `select` can also be used as
     * {@link /muze/api/datamodel/functional-operator | functional operator}.
     *
     * @public
     *
     * @param {Array.<string | Regexp>} projField - An array of column names in string or regular expression.
     * @param {Object} [config] - An optional config to control the creation of new DataModel
     * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - Mode of the projection
     *
     * @return {DataModel} Returns the new DataModel instance after operation.
     */
    project (projField, config) {
        const defConfig = {
            mode: FilteringMode.NORMAL,
            saveChild: true
        };
        config = Object.assign({}, defConfig, config);
        const fieldConfig = this.getFieldsConfig();
        const allFields = Object.keys(fieldConfig);
        const { mode } = config;

        let normalizedProjField = projField.reduce((acc, field) => {
            if (field.constructor.name === 'RegExp') {
                acc.push(...allFields.filter(fieldName => fieldName.search(field) !== -1));
            } else if (field in fieldConfig) {
                acc.push(field);
            }
            return acc;
        }, []);

        normalizedProjField = Array.from(new Set(normalizedProjField)).map(field => field.trim());
        let dataModel;

        if (mode === FilteringMode.ALL) {
            let projectionClone = cloneWithProject(this, normalizedProjField, {
                mode: FilteringMode.NORMAL,
                saveChild: config.saveChild
            }, allFields);
            let rejectionClone = cloneWithProject(this, normalizedProjField, {
                mode: FilteringMode.INVERSE,
                saveChild: config.saveChild
            }, allFields);
            dataModel = [projectionClone, rejectionClone];
        } else {
            let projectionClone = cloneWithProject(this, normalizedProjField, config, allFields);
            dataModel = projectionClone;
        }

        return dataModel;
    }

    getFieldsConfig () {
        return this._fieldConfig;
    }

    calculateFieldsConfig () {
        this._fieldConfig = this._fieldspace.fields.reduce((acc, fieldDef, i) => {
            acc[fieldDef.name] = {
                index: i,
                def: { name: fieldDef._ref.name, type: fieldDef._ref.fieldType, subtype: fieldDef._ref.subType() }
            };
            return acc;
        }, {});
        return this;
    }


    /**
     * Frees up the resources associated with the current DataModel instance and breaks all the links instance has in
     * the DAG.
     *
     * @public
     */
    dispose () {
        this._parent.removeChild(this);
        this._parent = null;
    }

    /**
     * Removes the specified child {@link DataModel} from the child list of the current {@link DataModel} instance.
     *
     * @example
     * const schema = [
     *    { name: 'Name', type: 'dimension' },
     *    { name: 'HorsePower', type: 'measure' },
     *    { name: "Origin", type: 'dimension' }
     * ];
     *
     * const data = [
     *    { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
     *    { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
     *    { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
     *    { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
     * ]
     *
     * const dt = new DataModel(schema, data);
     *
     * const dt2 = dt.select(fields => fields.Origin.value === "USA")
     * dt.removeChild(dt2);
     *
     * @private
     *
     * @param {DataModel} child - Delegates the parent to remove this child.
     */
    removeChild (child) {
        let idx = this._children.findIndex(sibling => sibling === child);
        idx !== -1 ? this._children.splice(idx, 1) : true;
    }

    /**
     * Adds the specified {@link DataModel} as a parent for the current {@link DataModel} instance.
     *
     * The optional criteriaQueue is an array containing the history of transaction performed on parent
     *  {@link DataModel} to get the current one.
     *
     * @param {DataModel} parent - The datamodel instance which will act as parent.
     * @param {Array} criteriaQueue - Queue contains in-between operation meta-data.
     */
    addParent (parent, criteriaQueue = []) {
        persistDerivation(this, DM_DERIVATIVES.COMPOSE, null, criteriaQueue);
        this._parent = parent;
        parent._children.push(this);
    }
}

export default Relation;