All files / datamodel/src/operator compose.js

100% Statements 26/26
100% Branches 7/7
100% Functions 11/11
100% Lines 18/18

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305                                                                                                                                                6x                                                                                                         3x                                                                                                                       3x                                                                                     1x                                                                                                           1x 8x 9x   9x 9x   9x 15x 15x 15x 9x       9x 9x 5x     9x    
 
/**
 * DataModel's opearators are exposed as composable functional operators as well as chainable operators. Chainable
 * operators are called on the instances of {@link Datamodel} class.
 *
 * Those same operators can be used as composable operators from `DataModel.Operators` namespace.
 *
 * The procedure of invoking all these operators is consistent. All these operators when called with the argument
 * returns a function which expects a DataModel instance.
 *
 * @public
 * @module Operators
 * @namespace DataModel
 */
 
/**
 * This is functional version of selection operator. Selection is a row filtering operation. It takes
 * {@link SelectionPredicate | predicate} for filtering criteria and returns a function. The returned function is called
 * with the DataModel instance on which the action needs to be performed.
 *
 * {@link SelectionPredicate} is a function which returns a boolean value. For selection opearation the selection
 * function is called for each row of DataModel instance with the current row passed as argument.
 *
 * After executing {@link SelectionPredicate} the rows are labeled as either an entry of selection set or an entry
 * of rejection set.
 *
 * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
 * resulatant datamodel.
 *
 * @warning
 * Note
 * [Warn] Selection and rejection set is only a logical idea for concept explanation purpose.
 *
 * @error
 * Not all modes are valid
 * `FilteringMode.ALL` is not a valid working mode for functional version of `select`. Its only avialable on the chained
 * version.
 *
 * @example
 *  //@preamble_start
 *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
 *  const data = params[0];
 *  const schema = params[1];
 *  const DataModel = muze.DataModel;
 *  const dm = new DataModel(data, schema);
 *  //@preamble_end
 *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
 *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted from
 *  // muze namespace and assigned to the DataModel variable.
 *  const select = DataModel.Operators.select;
 *  usaCarsFn = select(fields => fields.Origin.value === 'USA');
 *  outputDM = usaCarsFn(dm);
 *  //@preamble_start
 *  printDM(outputDM);
 *  });
 *  //@preamble_end
 *
 * @public
 * @namespace DataModel
 * @segment Operators
 *
 * @param {SelectionPredicate} selectFn - Predicate function which is called for each row with the current row
 *      ```
 *          function (row, i)  { ... }
 *      ```
 * @param {Object} [config] - The configuration object to control the inclusion exclusion of a row in resultant
 *      DataModel instance
 * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - The mode of the selection
 *
 * @return {PreparatorFunction} Function which expects an instance of DataModel on which the operator needs to be
 *      applied.
 */
export const select = (...args) => dm => dm.select(...args);
 
/**
 * This is functional version of projection operator. Projection is a column (field) filtering operation. It expects
 * list of fields name and either include those or exclude those based on {@link FilteringMode} on the resultant
 * dataModel. It returns a function which is called with the DataModel instance on which the action needs to be
 * performed.
 *
 * Projection expects array of fields name based on which it creates the selection and rejection set. All the field
 * whose name is present in array goes in selection set and rest of the fields goes in rejection set.
 *
 * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
 * resulatant datamodel.
 *
 * @warning
 * Note
 * Selection and rejection set is only a logical idea for concept explanation purpose.
 *
 * @error
 * Not all modes are valid
 * `FilteringMode.ALL` is not a valid working mode for functional version of `select`. Its only avialable on the
 * chained version.
 *
 * @example
 *  //@preamble_start
 *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
 *  const data = params[0];
 *  const schema = params[1];
 *  const DataModel = muze.DataModel;
 *  const dm = new DataModel(data, schema);
 *  //@preamble_end
 *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
 *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted from
 *  // muze namespace and assigned to the DataModel variable.
 *  const project = DataModel.Operators.project;
 *  usaCarsFn = project(['Name'], { mode: DataModel.FilteringMode.INVERSE });
 *  outputDM = usaCarsFn(dm);
 *  //@preamble_start
 *  printDM(outputDM);
 *  });
 *  //@preamble_end
 *
 * @public
 * @namespace DataModel
 * @segment Operators
 *
 * @param {Array.<string | Regexp>} projField - An array of column names in string or regular expression.
 * @param {Object} [config] - An optional config to control the creation of new DataModel
 * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - Mode of the projection
 *
 * @return {PreparatorFunction} Function which expects an instance of DataModel on which the operator needs to be
 *      applied.
 */
export const project = (...args) => dm => dm.project(...args);
 
/**
 * This is functional version of binnig operator. Binning happens on a measure field based on a binning configuration.
 * Binning in DataModel does not aggregate the number of rows present in DataModel instance after binning, it just adds
 * a new field with the binned value. Refer binning {@link example_of_binning | example} to have a intuition of what
 * binning is and the use case.
 *
 * Binning can be configured by
 * - providing custom bin configuration with non uniform buckets
 * - providing bin count
 * - providing each bin size
 *
 * When custom buckets are provided as part of binning configuration
 * @example
 *  // DataModel already prepared and assigned to dm vairable
 *  const buckets = {
 *      start: 30
 *      stops: [80, 100, 110]
 *  };
 *  const config = { buckets, name: 'binnedHP' }
 *  const binFn = bin('horsepower', config);
 *  const binnedDm = binFn(dm);
 *
 * @text
 * When `binCount` is defined as part of binning configuration
 * @example
 *  // DataModel already prepared and assigned to dm vairable
 *  const config = { binCount: 5, name: 'binnedHP' }
 *  const binFn = bin('horsepower', config);
 *  const binnedDm = binFn(Dm);
 *
 * @text
 * When `binSize` is defined as part of binning configuration
 * @example
 *  // DataModel already prepared and assigned to dm vairable
 *  const config = { binSize: 200, name: 'binnedHorsepower' }
 *  const binnedDm = dataModel.bin('horsepower', config);
 *  const binnedDm = binFn(Dm);
 *
 * @todo Fix interaction of binning and then make it public
 * @private
 * @namespace DataModel
 * @module Operators
 *
 * @param {String} name Name of measure which will be used to create bin
 * @param {Object} config Config required for bin creation
 * @param {Array.<Number>} config.bucketObj.stops Defination of bucket ranges. Two subsequent number from arrays
 *      are picked and a range is created. The first number from range is inclusive and the second number from range
 *      is exclusive.
 * @param {Number} [config.bucketObj.startAt] Force the start of the bin from a particular number.
 *      If not mentioned, the start of the bin or the lower domain of the data if stops is not mentioned, else its
 *      the first value of the stop.
 * @param {Number} config.binSize Bucket size for each bin
 * @param {Number} config.binCount Number of bins which will be created
 * @param {String} config.name Name of the new binned field to be created
 *
 * @return {PreparatorFunction} Function which expects an instance of DataModel on which the operator needs to be
 *      applied.
 */
export const bin = (...args) => dm => dm.bin(...args);
 
/**
 * This is functional version of `groupBy` operator. This operator groups the data using particular dimensions and by
 * reducing measures. It expects a list of dimensions using which it projects the datamodel and perform aggregations to
 * reduce the duplicate tuples. Refer {@link /muze/docs/datamodel-operators#groupby | this document} to know the
 * intuition behind groupBy.
 *
 * DataModel by default provides definition of few {@link reducer | Reducers}.
 * {@link ReducerStore | User defined reducers} can also be registered.
 *
 * @example
 *  //@preamble_start
 *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
 *  const data = params[0];
 *  const schema = params[1];
 *  const DataModel = muze.DataModel;
 *  const dm = new DataModel(data, schema);
 *  //@preamble_end
 *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
 *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted from
 *  // muze namespace and assigned to the DataModel variable.
 *
 *  const groupBy = DataModel.Operators.groupBy;
 *  const groupedFn = groupBy(['Year'], { horsepower: 'max' } );
 *  const outputDM = groupByFn(dm);
 *  //@preamble_start
 *  printDM(outputDM);
 *  });
 *  //@preamble_end
 *
 * @public
 * @namespace DataModel
 * @segment Operators
 *
 * @param {Array.<string>} fieldsArr Array containing the name of dimensions
 * @param {Object} [reducers={}] A map whose key is the variable name and value is the name of the reducer. If its
 *      not passed, or any variable is ommitted from the object, default aggregation function is used from the
 *      schema of the variable.
 *
 * @return {PreparatorFunction} Function which expects an instance of DataModel on which the operator needs to be
 *      applied.
 */
export const groupBy = (...args) => dm => dm.groupBy(...args);
 
/**
 * It enables you to create new operator by composing existing operators. The newly created operator is used like any
 * other operator. The operations provided will be executed in a serial manner ie. result of one operation will be the
 * input for the next operations (like pipe operator in unix).
 *
 * Compose has added benefits which chaining does not provide. Like, if there are group of operators are involved to
 * transform data, chaining would create so intermediate DataModel instances. If `compose` is used no intermediate
 * DataModels are created.
 *
 * Suported operators in compose are
 * - `select`
 * - `project`
 * - `groupBy`
 * - `bin`
 * - Any operator created using compose `compose`
 *
 * @example
 *  //@preamble_start
 *  Promise.all([loadData('/static/cars.json'), loadData('/static/cars-schema.json')]).then(function (params) {
 *  const data = params[0];
 *  const schema = params[1];
 *  const DataModel = muze.DataModel;
 *  const dm = new DataModel(data, schema);
 *  //@preamble_end
 *  // DataModel instance is created from https://www.charts.com/static/cars.json data,
 *  // https://www.charts.com/static/cars-schema.json schema and assigned to variable dm. DataModel is extracted from
 *  // muze namespace and assigned to the DataModel variable.
 *  const compose = DataModel.Operators.compose;
 *  const select = DataModel.Operators.select;
 *  const project = DataModel.Operators.project;
 *
 *  let lowCylCarsFromUSADM= compose(
 *      select(fields => fields.Origin.value === 'USA' && fields.Cylinders.value === '4' ),
 *      project(['Origin', 'Cylinders'], { mode: DataModel.FilteringMode.INVERSE })
 *  );
 *
 *  const outputDM = lowCylCarsFromUSADM(dm);
 *  //@preamble_start
 *  printDM(outputDM);
 *  });
 *  //@preamble_end
 *
 * @public
 * @namespace DataModel
 * @module Operators
 *
 * @param {Array.<Operators>} operators: An array of operation that will be applied on the
 * datatable.
 *
 * @return {PreparatorFunction} Function which expects an instance of DataModel on which the operator needs to be
 *      applied.
 */
export const compose = (...operations) =>
    (dm, config = { saveChild: true }) => {
        let currentDM = dm;
        let frstChild;
        const derivations = [];
        const saveChild = config.saveChild;
 
        operations.forEach((operation) => {
            currentDM = operation(currentDM);
            derivations.push(...currentDM._derivation);
            if (!frstChild) {
                frstChild = currentDM;
            }
        });
 
        saveChild && currentDM.addParent(dm, derivations);
        if (derivations.length > 1) {
            frstChild.dispose();
        }
 
        return currentDM;
    };