/**
* Implementation of a dataset backed by Crossfilter, ie. fully client side filtering without the need for a server or database.
* Due to limitation of crossfilter with array (or data that has no natrual ordering), this will not work as expected:
* * dimension: `function (d) {return [d.x, d.y, d.z]}`
* * group: `function (d) {return [d.x / 10 , d.y / 10, d.z / 10]}`
*
* Therfore, we preform grouping already in the dimension itself, and join the array to a string.
* Strings have a natural ordering and thus can be used as dimension value.
* * dimension: `function (d) -> "d.x/10|d.y/10|d.z/10"`
* * group: `function (d) {return d;}`
* @module client/dataset-client
*/
var moment = require('moment-timezone');
var Dataset = require('./dataset');
var utildx = require('../util-crossfilter');
var misval = require('../misval');
var grpIdxToName = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'};
var aggIdxToName = {0: 'aa', 1: 'bb', 2: 'cc', 3: 'dd', 4: 'ee'};
/**
* Crossfilter instance, see [here](http://square.github.io/crossfilter/)
*/
var crossfilter = require('crossfilter2')([]);
/**
* setMinMax sets the range of a continuous or time facet
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setMinMax (dataset, facet) {
var fn;
fn = utildx.valueFn(facet);
var group = dataset.crossfilter.groupAll();
var takeMin;
var takeMax;
if (facet.displayContinuous) {
takeMin = function (a, b) {
if (b === misval || a < b) {
return a;
}
return b;
};
takeMax = function (a, b) {
if (b === misval || a > b) {
return a;
}
return b;
};
} else if (facet.displayDatetime) {
takeMin = function (a, b) {
if (b === misval || a.isBefore(b)) {
return a;
} else {
return b;
}
};
takeMax = function (a, b) {
if (b === misval || b.isBefore(a)) {
return a;
} else {
return b;
}
};
}
group.reduce(
function (p, d) { // add
var v = fn(d);
if (v !== misval) {
p.min = takeMin(v, p.min);
p.max = takeMax(v, p.max);
}
return p;
},
function (p, v) { // subtract
return p;
},
function () { // initialize
return {
min: misval,
max: misval
};
}
);
if (facet.displayDatetime) {
facet.minvalAsText = group.value().min.format();
facet.maxvalAsText = group.value().max.format();
} else if (facet.displayContinuous) {
facet.minvalAsText = group.value().min.toString();
facet.maxvalAsText = group.value().max.toString();
}
}
/**
* sampleDataset returns an array containing N random datums from the dataset
* @param {Dataset} dataset
* @param {intger} N Number of elements to pick
* @returns {Object[]} Array N data Objects
*/
function sampleDataset (dataset, N) {
var wantedElements = [];
var i;
for (i = 0; i < N; i++) {
wantedElements[i] = Math.round(Math.random() * dataset.crossfilter.size());
}
var group = dataset.crossfilter.groupAll();
group.reduce(
function (p, d) { // add
var i = wantedElements.indexOf(p.element);
if (i > -1) {
p.data[i] = d;
}
p.element++;
return p;
},
function (p, v) { // subtract
return p;
},
function () { // initialize
return {
element: 0,
data: []
};
}
);
return group.value().data;
}
/**
* setCategories finds finds all values on an ordinal (categorial) axis
* Updates the categorialTransform of the facet
*
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setCategories (dataset, facet) {
var fn = utildx.baseValueFn(facet);
var group = dataset.crossfilter.groupAll();
group.reduce(
function (p, v) { // add
var vals = fn(v);
if (!(vals instanceof Array)) {
vals = [vals];
}
vals.forEach(function (val) {
if (p.hasOwnProperty(val)) {
p[val]++;
} else {
p[val] = 1;
}
});
return p;
},
function (p, v) { // subtract
return p;
},
function () { // initialize
return {};
}
);
facet.categorialTransform.reset();
var data = group.value();
Object.keys(data).forEach(function (key) {
// TODO: missing data should be mapped to a misval from misvalAsText
var keyAsString = key.toString();
var groupAsString = keyAsString;
facet.categorialTransform.add({expression: keyAsString, count: data[key], group: groupAsString});
});
}
/**
* Calculate 100 percentiles (ie. 1,2,3,4 etc.), and initialize the `facet.continuousTransform`
* to an approximate percentile mapping.
* Use the recommended method from [NIST](http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm)
* See also the discussion on [Wikipedia](https://en.wikipedia.org/wiki/Percentile)
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setPercentiles (dataset, facet) {
var basevalueFn = utildx.baseValueFn(facet);
var dimension = dataset.crossfilter.dimension(basevalueFn);
var data = dimension.bottom(Infinity);
dimension.dispose();
var x, i;
// drop missing values, which should be sorted at the start of the array
i = 0;
while (basevalueFn(data[i]) === misval) i++;
data.splice(0, i);
// start clean
facet.continuousTransform.reset();
// add minimum value as p0
facet.continuousTransform.add({x: basevalueFn(data[0]), fx: 0});
var p, value;
for (p = 1; p < 100; p++) {
x = (p * 0.01) * (data.length + 1) - 1; // indexing starts at zero, not at one
i = Math.trunc(x);
value = (1 - x + i) * basevalueFn(data[i]) + (x - i) * basevalueFn(data[i + 1]);
facet.continuousTransform.add({x: value, fx: p});
}
// add maximum value as p100
facet.continuousTransform.add({x: basevalueFn(data[data.length - 1]), fx: 100});
facet.transformType = 'percentiles';
}
/**
* Calculate value where exceedance probability is one in 10,20,30,40,50,
* and the same for subceedance (?), ie the exceedance of the dataset where each point is replaced by its negative.
* Approximate from data: 1 in 10 is larger than value at index trunc(0.1 * len(data))
* Set the `facet.continuousTransform` to the approximate mapping.
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setExceedances (dataset, facet) {
var basevalueFn = utildx.baseValueFn(facet);
var dimension = dataset.crossfilter.dimension(basevalueFn);
var data = dimension.bottom(Infinity);
dimension.dispose();
var exceedances = [];
var i, oom, mult, n, value, valuep, valuem;
// drop missing values, which should be sorted at the start of the array
i = 0;
while (basevalueFn(data[i]) === misval) i++;
data.splice(0, i);
// exceedance:
// '1 in n' value, or what is the value x such that the probabiltiy drawing a value y with y > x is 1 / n
if (data.length % 2 === 0) {
valuem = basevalueFn(data[(data.length / 2) - 1]);
valuep = basevalueFn(data[(data.length / 2)]);
value = 0.5 * (valuem + valuep);
} else {
value = basevalueFn(data[(Math.trunc(data.length / 2))]);
}
exceedances = [{x: value, fx: 0}];
// order of magnitude
oom = 1;
mult = 3;
while (mult * oom < data.length) {
n = oom * mult;
// exceedance
i = data.length - Math.trunc(data.length / n) - 1;
value = basevalueFn(data[i]);
exceedances.push({x: value, fx: n});
// subceedance (?)
i = data.length - i - 1;
value = basevalueFn(data[i]);
exceedances.unshift({x: value, fx: -n});
mult++;
if (mult === 10) {
oom = oom * 10;
mult = 1;
}
}
// add minimum and maximum values
exceedances.unshift({x: basevalueFn(data[0]), fx: -data.length});
exceedances.push({x: basevalueFn(data[data.length - 1]), fx: data.length});
// start clean
facet.continuousTransform.reset();
// generate rules
exceedances.forEach(function (ex) {
facet.continuousTransform.add(ex);
});
facet.transformType = 'exceedances';
}
/**
* Autoconfigure a dataset:
* 1. pick 10 random elements
* 2. create facets for their properties
* 3. add facets' values over the sample to the facet.description
*
* @param {Dataset} dataset
*/
function scanData (dataset) {
function facetExists (dataset, path) {
var exists = false;
dataset.facets.forEach(function (f) {
if (f.accessor === path || f.accessor === path + '[]') {
exists = true;
}
});
return exists;
}
function addValue (values, v, missing) {
if (v === misval) {
v = missing;
}
if (values.indexOf(v) === -1) {
values.push(v);
}
}
function guessType (values) {
var categorial = 0;
var continuous = 0;
var timeorduration = 0;
var max;
values.forEach(function (value) {
if (moment(value, moment.ISO_8601).isValid()) {
timeorduration++;
} else if (value == +value) { // eslint-disable-line eqeqeq
continuous++;
} else {
categorial++;
}
});
max = Math.max(categorial, continuous, timeorduration);
if (max === continuous) { // prefer continuous over time
return 'continuous';
} else if (max === timeorduration) {
return 'timeorduration';
} else {
return 'categorial';
}
}
function tryFacet (dataset, path, value) {
// Check for existence
if (facetExists(dataset, path)) {
return;
}
// Create a new facet
var facet = dataset.facets.add({
name: path,
accessor: path,
type: 'categorial',
misvalAsText: '"null"'
});
// Sample values
var baseValueFn = utildx.baseValueFn(facet);
var values = [];
var isArray = false;
data.forEach(function (d) {
var value = baseValueFn(d);
if (value instanceof Array) {
isArray = true;
value.forEach(function (v) {
addValue(values, v, facet.misval[0]);
});
} else {
addValue(values, value, facet.misval[0]);
}
});
// Reconfigure facet
facet.accessor = isArray ? facet.accessor + '[]' : facet.accessor;
facet.type = guessType(values);
facet.description = values.join(', ');
}
function recurse (dataset, path, tree) {
var props = Object.getOwnPropertyNames(tree);
props.forEach(function (name) {
var subpath;
if (path) subpath = path + '.' + name; else subpath = name;
if (tree[name] instanceof Array) {
// add an array as a itself as a facet, ie. labelset, to prevent adding each element as separate facet
// also add the array length as facet
tryFacet(dataset, subpath, tree[name]);
tryFacet(dataset, subpath + '.length', tree[name].length);
} else if (tree[name] instanceof Object) {
// recurse into objects
recurse(dataset, subpath, tree[name]);
} else {
// add strings and numbers as facets
tryFacet(dataset, subpath, tree[name]);
}
});
}
// Add facets
var data = sampleDataset(dataset, 10);
data.forEach(function (d) {
recurse(dataset, '', d);
});
}
/**
* Initialize the data filter, and construct the getData callback function on the filter.
* @param {Dataset} dataset
* @param {Filter} filter
*/
function initDataFilter (dataset, filter) {
var facet;
// use the partitions as groups:
var groupFns = [];
filter.partitions.forEach(function (partition) {
facet = dataset.facets.get(partition.facetId);
var valueFn = utildx.valueFn(facet);
var groupFn = utildx.groupFn(partition);
var rank = partition.rank;
groupFns[rank - 1] = function (d) {
return groupFn(valueFn(d));
};
});
// and then create keys from the group values
var groupsKeys = function (d) {
var keys = [];
groupFns.forEach(function (groupFn) {
var result = groupFn(d);
var newKeys = [];
if (keys.length === 0) {
if (result instanceof Array) {
newKeys = result;
} else {
newKeys = [result];
}
} else {
if (result instanceof Array) {
keys.forEach(function (oldKey) {
result.forEach(function (key) {
newKeys.push(oldKey + '|' + key);
});
});
} else {
keys.forEach(function (oldKey) {
newKeys.push(oldKey + '|' + result);
});
}
}
keys = newKeys;
});
return keys;
};
// set up the facet valueFns to aggregate over
// and the reduction functions for them
var aggregateFns = [];
var reduceFns = [];
if (filter.aggregates.length === 0) {
// fall back to just counting item
aggregateFns[0] = function (d) { return 1; };
reduceFns[0] = function (d) { return d.count; };
} else {
filter.aggregates.forEach(function (aggregate) {
facet = dataset.facets.get(aggregate.facetId);
aggregateFns.push(utildx.valueFn(facet));
reduceFns.push(utildx.reduceFn(aggregate));
});
}
// setup the crossfilter dimensions and groups
filter.dimension = dataset.crossfilter.dimension(function (d) {
return groupsKeys(d);
}, true);
var group = filter.dimension.group(function (d) { return d; });
group.reduce(
function (p, d) { // add
aggregateFns.forEach(function (aggregateFn, i) {
p[i] = p[i] || {count: 0, sum: 0};
p[i].count += aggregateFn(d);
p[i].sum += aggregateFn(d);
});
return p;
},
function (p, d) { // subtract
aggregateFns.forEach(function (aggregateFn, i) {
p[i] = p[i] || {count: 0, sum: 0};
p[i].count -= aggregateFn(d);
p[i].sum -= aggregateFn(d);
});
return p;
},
function () { // initialize
return [];
}
);
filter.getData = function () {
filter.data = [];
// Get data from crossfilter
var groups = group.all();
// { key: "group1|group2|...",
// value: [ {count: agg1, sum: agg1}
// {count: agg2, sum: agg2}
// {count: agg3, sum: agg3}
// ... ]}
groups.forEach(function (group) {
var item = {};
// turn the string back into individual group values
var groupsKeys = group.key.split('|');
// add paritioning data to the item
groupsKeys.forEach(function (subkey, i) {
item[grpIdxToName[i]] = subkey;
});
// add aggregated data to the item
reduceFns.forEach(function (reduceFn, i) {
item[aggIdxToName[i]] = reduceFn(group.value[i]);
});
filter.data.push(item);
});
filter.trigger('newData');
};
}
/**
* The opposite or initDataFilter, it should remove the filter and deallocate other configuration
* related to the filter.
* @param {Dataset} dataset
* @param {Filter} filter
*/
function releaseDataFilter (dataset, filter) {
if (filter.dimension) {
filter.dimension.filterAll();
filter.dimension.dispose();
delete filter.dimension;
delete filter.getData;
}
}
/**
* Change the filter parameters for an initialized filter
* @param {Dataset} dataset
* @param {Filter} filter
*/
function updateDataFilter (dataset, filter) {
if (filter.dimension) {
filter.dimension.filterFunction(filter.filterFunction());
}
}
module.exports = Dataset.extend({
props: {
datasetType: {
type: 'string',
setOnce: true,
default: 'client'
}
},
/*
* Implementation of virtual methods
*/
scanData: function () {
scanData(this);
},
setMinMax: setMinMax,
setCategories: setCategories,
setPercentiles: setPercentiles,
setExceedances: setExceedances,
initDataFilter: initDataFilter,
releaseDataFilter: releaseDataFilter,
updateDataFilter: updateDataFilter,
/*
* Crossfilter Object, for generating dimensions
*/
crossfilter: crossfilter
});