src/supergroup.js
'use strict';
import _ from 'lodash';
import assert from 'assert';
/**
* @Author: [Sigfried Gold](http://sigfried.org)
* @License: [MIT](http://sigfried.mit-license.org/)
* @Version: 2.0.0
*/
; // jshint -W053
/**
* ### [http://sigfried.github.io/supergroup/ -- Tutorial and demo]
* ### [http://www.toptal.com/javascript/ultimate-in-memory-data-collection-manipulation-with-supergroup-js](Article)
*
* usage examples at [http://sigfried.github.io/blog/supergroup](http://sigfried.github.io/blog/supergroup)
*
* Avaailable as _.supergroup, Underscore mixin
* ### Class of grouped records masquerading as an array
* A `Supergroup` object is an array of `Value` objects made by grouping
* an array of json objects by some set of properties or functions performed
* on those objects. Each `Value` represents a single group. Think of it as
* a SQL group by:
*
* SELECT state, zipcode, count(*)
* FROM addresses
* GROUP BY state, zipcode
*
* In Supergroup parlance: 'state' and 'zipcode' are _dimensions_; states
* ('Alabama', 'Alaska') and zipcodes (50032, 20002) are _values_, or,
* rather, value _keys_; and `count(*)` is an aggregation performed on the
* group. In regular SQL the underlying records represented in a group are
* not available, with Supergroup they are. So a `Value` has a `key` which
* is the text or number or any javascript object used to form the group.
* In a group of states, the _key_ of each value would be a `string`, for
* zipdcodes it could be a `number`. (In previous versions of Supergroup,
* these were `String` and `Number` objects, but now they are `string`
* literals or anything else returnable by a grouping function.)
*
* `Value` objects have a `key`, and `valueobj.valueOf()` will return that
* key, and `valueobj.toString()` will return the results of the default
* toString method on that key. `valueobj.records` is an array of the original
* javascript objects included in the group represented by the key. And
* `valueobj.indexes` is an array of the positions of those records in the
* original array.
*
* - #### Supergroup extends `Array`
* - `Array` values are `Values`
* - properties:
* - groupsmap: keys are the keys used to group Values, values are Values
* - recsmap: keys are index into original records array, values are orig records
* - methods:
* - rawValues: returns keys from groupsmap
*
* - Values
* - depth: same as the depth of its parentList (supergroup)
* - children: array of child Values collected in a supergroup (whose
* depth is one greater than the depth of this Value)
*
*/
export class Supergroup extends Array {
/**
* Constructor groups records and builds tree structure
* @exported class supergroup.group(recs, dim, opts)
* @param {Object[]} recs array of objects, raw data
* @param {string[]} dims property names to be used for grouping the raw objects
* @param {function[]} dims functions on raw objects that return any kind of
* object to be used for grouping. property names and
* functions can be mixed in dims array. For single-level
* grouping, a single property name or function can be
* used instead of an array.
* @param {string[]} [opts.dimNames] array (or single value) of dim names of
* same length as dims. Property name dims
* are used as dimName by default.
* @param {Object} [opts] options for configuring supergroup behavior. opts are
* forwarded to Value constructors and subgroup constructors.
* @param {Object[]} [opts.excludeValues] to exlude specific group values
* @param {function} [opts.preListRecsHook] run recs through this function before continuing processing __currently unused__
* @param {function} [opts.truncateBranchOnEmptyVal]
* @return {Array of Values} enhanced with all the List methods
*/
constructor({ parent=null,
recs = [],
dims=[], dimNames=[], opts={} // get rid of opts
} = {}) {
super();
this.root = parent || Supergroup.makeRoot('root', -1, recs);
console.log('ROOT:', this.root);
if (!_.isArray(dims)) dims = [dims];
this.dims = _.clone(dims);
dimNames = opts.dimName && [opts.dimName] ||
opts.dimNames || dimNames;
this.dimNames = _.clone(dimNames);
this.dim = dims.shift();
this.dimName = dimNames.shift();
this.parent = this.root;
this.recsMap = this.parent.recsMap;
if (_.isFunction(this.dim)) {
this.dimFunc = dim;
this.dimName = this.dimName || dim.toString();
} else {
this.dimFunc = (d) => d[dim];
this.dimName = this.dimName || this.dim.toString();
}
root.children = nest(root.recsmap, dims,
opts.dimName && [opts.dimName] ||
opts.dimNames || dimNames,
opts, root);
if (opts.multiValuedGroup || opts.multiValuedGroups) {
throw new Error("multiValuedGroup not implemented in es6 version yet");
}
if (opts.preListRecsHook) {
throw new Error("preListRecsHook not re-implemented yet");
recs = opts.preListRecsHook ? opts.preListRecsHook(recs) : recs;
}
if (opts.truncateBranchOnEmptyVal) { // can't remember when this is used
throw new Error("truncateBranchOnEmptyVal not re-implemented yet");
recs = recs.filter(r => !_.isEmpty(r[dim]) || (_.isNumber(r[dim]) && isFinite(r[dim])));
}
this.groupsMap = new Map();
this.recsMap.forEach( (rec,i) => {
let key = this.dimFunc(rec); // this is the key for grouping!
let val;
if (!groupsMap.has(key)) {
if (opts.excludeValues) {
if (_.isArray(opts.excludeValues) && !_.find(opts.exludeValues(key))) {
} else if (opts.excludeValues instanceof Map && !opt.excludeValues.has(key)) {
}
} else {
val = new Value(key);
val.dim = this.dimName;
val.recsMap = new Map();
val.depth = this.depth;
val.parentList = this;
groupsMap.set(key, val); // save the val in the keyed map
this.push(val); // also save it as an array entry
}
} else {
val = groupsMap.get(key);
}
val.recsMap.set(i, rec); // each val gets records and index where
// record is in the original array
});
this.groupsmap.forEach( (val, groupKey) => {
if (dims.length) {
val.children = new Supergroup(parent=val, dims=dims, dimNames=dimNames, opts=opts);
}
});
}
/** There are time when you want to give your supergroup tree an explicit
* root, like when creating hierarchies in D3. In that case call supergroup
* like:
*
* let root = makeRoot('Tree Top', 0, recs),
* let sg = new Supergroup({parent=root, dims=['state','zipcode']});
*
* Otherwise Supergroup will make its own fake root with depth -1 instead
* of depth 0;
*/
static makeRoot(name, depth, recs, dimName) {
name = name || "root";
dimName = dimName || name;
let root = new Value(name)
root.dim = dimName;
root.depth = depth
root.recsmap = new Map();
recs.forEach( (r,i) => {
root.recsmap.set(i, r)
});
return root;
}
state() {
return new State(this);
}
// sometimes a root value is needed as the top of a hierarchy
asRootVal(name, dimName) {
var val = new Value(name || 'Root');
val.dim = dimName || 'root';
val.depth = 0;
val.records = this.records;
val.children= this;
_.each(val.children, function(d) { d.parent = val; });
_.each(val.descendants(), function(d) { d.depth = d.depth + 1; });
return val;
};
leafNodes(level) {
return _.chain(this).invoke('leafNodes').flatten()
.addSupergroupMethods()
.value();
};
rawValues() {
return this.children.keys();
};
/** lookup a value in a list, or, if query is an array
* it is interpreted as a path down the group hierarchy */
lookup(query) {
if (_.isArray(query)) {
// if group has children, can search down the tree
var values = query.slice(0);
var list = this;
var ret;
while(values.length) {
ret = list.singleLookup(values.shift());
list = ret.children;
}
return ret;
} else {
return this.singleLookup(query);
}
};
getLookupMap() {
var self = this;
if (! ('lookupMap' in self)) {
self.lookupMap = {};
self.forEach(function(d) {
if (d in self.lookupMap)
console.warn('multiple occurrence of ' + d +
' in list. Lookup will only get the last');
self.lookupMap[d] = d;
});
}
return self.lookupMap;
};
singleLookup(query) {
return this.getLookupMap()[query];
};
// lookup more than one thing at a time
lookupMany(query) {
var list = this;
return addSupergroupMethods(_.chain(query).map(function(d) {
return list.singleLookup(d)
}).compact().value());
};
flattenTree() {
return _.chain(this)
.map(function(d) {
var desc = d.descendants();
return [d].concat(desc);
})
.flatten()
.filter(_.identity) // expunge nulls
.value();
};
/*
addLevel(dim, opts) {
_.each(this, function(val) {
val.addLevel(dim, opts);
});
return this;
};
*/
namePaths(opts) {
return _.map(this, function(d) {
return d.namePath(opts);
});
};
// apply a function to the records of each group
//
aggregates(func, field, ret) {
var results = _.map(this, function(val) {
return val.aggregate(func, field);
});
if (ret === 'dict')
return _.object(this, results);
return results;
};
d3NestEntries() {
return _.map(this, val => {
if ('children' in val)
return {key: val.toString(), values: val.children.d3NestEntries()};
return {key: val.toString(), values: val.records};
});
};
d3NestMap() {
return _.chain(this).map(
function(val) {
if (val.children)
return [val+'', val.children.d3NestMap()];
return [val+'', val.records];
}).object().value();
}
rootList(func) {
if ('parentVal' in this)
return this.parentVal.rootList();
return this;
};
static wholeListNumeric(groups) {
var isNumeric = _.every(_.keys(groups), function(k) {
return k === null ||
k === undefined ||
(!isNaN(Number(k))) ||
["null", ".", "undefined"].indexOf(k.toLowerCase()) > -1;
});
if (isNumeric) {
_.each(_.keys(groups), function(k) {
if (isNaN(k)) {
delete groups[k]; // getting rid of NULL values in dim list!!
}
});
}
return isNumeric;
}
}
/** Summarize records by a dimension
*
* @param {list} Records to be summarized
* @param {numericDim} Dimension to summarize by
*
* @memberof supergroup
*/
var aggregate = function(list, numericDim) {
if (numericDim) {
list = _.pluck(list, numericDim);
}
return _.reduce(list, function(memo,num){
memo.sum+=num;
memo.cnt++;
memo.avg=memo.sum/memo.cnt;
memo.max = Math.max(memo.max, num);
return memo;
},{sum:0,cnt:0,max:-Infinity});
};
/** Compare groups across two similar root nodes
*
* @param {from} ...
* @param {to} ...
* @param {dim} ...
* @param {opts} ...
*
* used by treelike and some earlier code
*
* @memberof supergroup
*/
var diffList = function(from, to, dim, opts) {
var fromList = new Supergroup(from.records, dim, opts);
var toList = new Supergroup(to.records, dim, opts);
//var list = makeList(sg.compare(fromList, toList, dim));
var list = compare(fromList, toList, dim);
list.dim = (opts && opts.dimName) ? opts.dimName : dim;
return list;
};
/** Compare two groups by a dimension
*
* @param {A} ...
* @param {B} ...
* @param {dim} ...
*
* @memberof supergroup
*/
var compare = function(A, B, dim) {
var a = _.chain(A).map(function(d) { return d+''; }).value();
var b = _.chain(B).map(function(d) { return d+''; }).value();
var comp = {};
_.each(A, function(d, i) {
comp[d+''] = {
name: d+'',
'in': 'from',
from: d,
fromIdx: i,
dim: dim
};
});
_.each(B, function(d, i) {
if ((d+'') in comp) {
var c = comp[d+''];
c['in'] = "both";
c.to = d;
c.toIdx = i;
} else {
comp[d+''] = {
name: d+'',
'in': 'to',
to: d,
toIdx: i,
dim: dim
};
}
});
var list = _.chain(comp).values().sort(function(a,b) {
return (a.fromIdx - b.fromIdx) || (a.toIdx - b.toIdx);
}).map(function(d) {
var val = new Value(d.name);
_.extend(val, d);
val.records = [];
if ('from' in d)
val.records = val.records.concat(d.from.records);
if ('to' in d)
val.records = val.records.concat(d.to.records);
return val;
}).value();
_.chain(list).map(function(d) {
d.parentList = list; // NOT TESTED, NOT USED, PROBABLY WRONG
d.records.parentVal = d; // NOT TESTED, NOT USED, PROBABLY WRONG
}).value();
return list;
};
/** Concatenate two Values into a new one (??)
*
* @param {from} ...
* @param {to} ...
*
* @memberof supergroup
*/
var compareValue = function(from, to) { // any reason to keep this?
if (from.dim !== to.dim) {
throw new Error("not sure what you're trying to do");
}
var name = from + ' to ' + to;
var val = new Value(name);
val.from = from;
val.to = to;
val.depth = 0;
val['in'] = "both";
val.records = [].concat(from.records,to.records);
val.records.parentVal = val; // NOT TESTED, NOT USED, PROBABLY WRONG
val.dim = from.dim;
return val;
};
//_.extend(StringValue.prototype, Value.prototype);
//_.extend(NumberValue.prototype, Value.prototype);
/** Sometimes a List gets turned into a standard array,
* sg.g., through slicing or sorting or filtering.
* addListMethods turns it back into a List
*
* `List` would be a constructor if IE10 supported
* \_\_proto\_\_, so it pretends to be one instead.
*
* @param {Array} Array to be extended
*
* @memberof supergroup
*/
var hierarchicalTableToTree = function(data, parentPropchildProp) {
throw new Error("fix this after getting rid of childProp");
// does not do the right thing if a value has two parents
// also, does not yet fix depth numbers
var parents = new Supergroup(data,[parentProp, childProp]); // 2-level grouping with all parent/child pairs
var children = parents.leafNodes();
var topParents = _.filter(parents, function(parent) {
var adoptiveParent = children.lookup(parent); // is this parent also a child?
if (adoptiveParent) { // if so, make it the parent
adoptiveParent.children = addSupergroupMethods([]);
_.each(parent.children, function(c) {
c.parent = adoptiveParent;
adoptiveParent.children.push(c)
});
} else { // if not, this is a top parent
return parent;
}
// if so, make use that child node, move this parent node's children over to it
});
return addSupergroupMethods(topParents);
};
// allows grouping by a field that contains an array of values rather than just a single value
if (_.createAggregator) {
var multiValuedGroupBy = _.createAggregator(function(result, value, keys) {
_.each(keys, function(key) {
if (hasOwnProperty.call(result, key)) {
result[key].push(value);
} else {
result[key] = [value];
}
});
});
} else {
var multiValuedGroupBy = function() { throw new Error("couldn't install multiValuedGroupBy") };
}
/**
* Class for managing filter state while leaving Supgergroups immutable
* as much as possible.
*/
class State {
constructor(sglist) {
this.list = sglist;
this.filters = new Map();
}
addFilter(type, key, filt, ids) {
}
selectByVal(val) {
assert.equal(val.rootList(), this.list); // assume state only on root lists
this.selectedVals.push(val);
}
}
class Filter {
constructor(type, key, filt, ids) {
this.type = type;
this.key = key;
this.filt = filt;
this.ids = ids;
}
}
/*
var g = function *test(n) {for(let i=0; i<n; i++){ yield i}; return; };
let a,b,c;
[a,b,c]=[{a:1},{a:2},{a:3}]
let w = new WeakSet([a,b,c]);
console.log(w);
a = null;
console.log(w.has(a));
console.log(w.has(b));
console.log(w);
debugger;
*/
State.prototype.selectByFilter = function(filt) {
this.selectedVals.push(val);
}
State.prototype.selectedRecs = function() {
return _.chain(this.selectedVals).pluck('records').flatten().value();
}
// @class Value
// @description Supergroup Lists are composed of Values which are
// String or Number objects representing group values.
// Methods described below.
export class Value {
constructor(val) {
this.val = val;
}
toString() {
return this.val.toString();
}
valueOf() {
return this.val.valueOf()
}
//Value.prototype.extendGroupBy = // backward compatibility
addLevel(dim, opts) {
opts = opts || {};
debugger;
_.each(this.leafNodes() || [this], function(d) {
opts.parent = d;
if (!('in' in d)) { // d.in means it's part of a diffList
d.children = new Supergroup(d.records, dim, opts);
} else { // allows adding levels to diffLists. haven't used for a long time
if (d['in'] === "both") {
d.children = diffList(d.from, d.to, dim, opts);
} else {
d.children = new Supergroup(d.records, dim, opts);
_.each(d.children, function(c) {
c['in'] = d['in'];
c[d['in']] = d[d['in']];
});
}
}
d.children.parentVal = d; // NOT TESTED, NOT USED, PROBABLY WRONG!!!
});
};
leafNodes(level) {
// until commit 31278a35b91a8f4bd4ddc4376c840fb14d2723f9
// supported level param, to only go down so many levels
// not supporting that any more. wasn't using it
if (!('children' in this)) return;
return _.chain(this.descendants()).filter(
function(d){
return _.isEmpty(d.children);
}).addSupergroupMethods().value();
var ret = [this];
if (typeof level === "undefined") {
level = Infinity;
}
if (level !== 0 && this.children && this.children.length && (!level || this.depth < level)) {
ret = _.flatten(_.map(this.children, function(c) {
return c.leafNodes(level);
}), true);
}
return ret;
};
addRecordsAsChildrenToLeafNodes(truncateEmpty) {
function fixLeaf(node) {
node.children = node.records;
_.each(node.children, function(rec) {
rec.parent = node;
rec.depth = node.depth + 1;
for(var method in Value.prototype) {
Object.defineProperty(rec, method, {
value: Value.prototype[method]
});
}
});
}
if (typeof truncateEmpty === "undefined")
truncateEmpty = true;
if (truncateEmpty) {
var self = this;
self.descendants().forEach(function(node) {
if (self.parent && self.parent.children.length === 1) {
fixLeaf(node);
}
});
} else {
_.each(this.leafNodes(), function(node) {
fixLeaf(node);
});
}
return this;
};
dimPath(opts) {
opts = delimOpts(opts);
opts.dimName = true;
return this.namePath(opts);
};
namePath(opts) {
opts = delimOpts(opts);
var path = this.pedigree(opts);
if (opts.dimName) path = _.pluck(path, 'dim');
if (opts.asArray) return path;
return path.join(opts.delim);
/*
var delim = opts.delim || '/';
return (this.parent ?
this.parent.namePath(_.extend({},opts,{notLeaf:true})) : '') +
((opts.noRoot && this.depth===0) ? '' :
(this + (opts.notLeaf ? delim : ''))
)
*/
};
pedigree(opts) {
opts = opts || {};
var path = [];
if (!opts.notThis) path.push(this);
var ptr = this;
while ((ptr = ptr.parent)) {
path.unshift(ptr);
}
if (opts.noRoot) path.shift();
if (opts.backwards || this.backwards) path.reverse(); //kludgy?
return path;
// CHANGING -- HOPE THIS DOESN'T BREAK STUFF (pedigree isn't
// documented yet)
if (!opts.asValues) return _.chain(path).invoke('valueOf').value();
return path;
};
path(opts) {
return this.pedigree(opts);
}
descendants(opts) {
// these two lines fix a treelike bug, hope they don't do harm
this.children = this.children || [];
_.addSupergroupMethods(this.children);
return this.children ? this.children.flattenTree() : undefined;
};
lookup(query) {
if (_.isArray(query)) {
if (this.valueOf() == query[0]) { // allow string/num comparison to succeed?
query = query.slice(1);
if (query.length === 0)
return this;
}
} else if (_.isString(query)) {
if (this.valueOf() == query) {
return this;
}
} else {
throw new Error("invalid param: " + query);
}
if (!this.children)
throw new Error("can only call lookup on Values with kids");
return this.children.lookup(query);
};
pct() {
return this.records.length / this.parentList.records.length;
};
previous() {
if (this.parentList) {
// could store pos on each value, but not doing that now
var pos = this.parentList.indexOf(this);
if (pos > 0) {
return this.parentList[pos - 1];
}
}
};
aggregate(func, field) {
if (_.isFunction(field))
return func(_.map(this.records, field));
return func(_.pluck(this.records, field));
};
rootList() {
return this.parentList.rootList();
};
/* didn't make this yet, just copied from above
Value.prototype.descendants(level) {
var ret = [this];
if (level !== 0 && this[childProp] && (!level || this.depth < level))
ret = _.flatten(_.map(this[childProp], function(c) {
return c.leafNodes(level);
}), true);
return makeList(ret);
};
*/
}
_.mixin({
//supergroup: supergroup.supergroup,
supergroup: ((...args) => new Supergroup(...args)),
//supergroup: function(d) { console.log('EEK'); debugger; throw new Error("blah");},
//addSupergroupMethods: supergroup.addSupergroupMethods,
multiValuedGroupBy: multiValuedGroupBy,
sgDiffList: diffList,
sgCompare: compare,
sgCompareValue: compareValue,
sgAggregate: aggregate,
hierarchicalTableToTree: hierarchicalTableToTree,
stateClass: State,
// FROM https://gist.github.com/AndreasBriese/1670507
// Return aritmethic mean of the elements
// if an iterator function is given, it is applied before
sum : function(obj, iterator, context) {
if (!iterator && _.isEmpty(obj)) return 0;
var result = 0;
if (!iterator && _.isArray(obj)){
for(var i=obj.length-1;i>-1;i-=1){
result += obj[i];
};
return result;
};
each(obj, function(value, index, list) {
var computed = iterator ? iterator.call(context, value, index, list) : value;
result += computed;
});
return result;
},
mean : function(obj, iterator, context) {
if (!iterator && _.isEmpty(obj)) return Infinity;
if (!iterator && _.isArray(obj)) return _.sum(obj)/obj.length;
if (_.isArray(obj) && !_.isEmpty(obj)) return _.sum(obj, iterator, context)/obj.length;
},
// Return median of the elements
// if the object element number is odd the median is the
// object in the "middle" of a sorted array
// in case of an even number, the arithmetic mean of the two elements
// in the middle (in case of characters or strings: obj[n/2-1] ) is returned.
// if an iterator function is provided, it is applied before
median : function(obj, iterator, context) {
if (_.isEmpty(obj)) return Infinity;
var tmpObj = [];
if (!iterator && _.isArray(obj)){
tmpObj = _.clone(obj);
tmpObj.sort(function(f,s){return f-s;});
}else{
_.isArray(obj) && each(obj, function(value, index, list) {
tmpObj.push(iterator ? iterator.call(context, value, index, list) : value);
tmpObj.sort();
});
};
return tmpObj.length%2 ? tmpObj[Math.floor(tmpObj.length/2)] : (_.isNumber(tmpObj[tmpObj.length/2-1]) && _.isNumber(tmpObj[tmpObj.length/2])) ? (tmpObj[tmpObj.length/2-1]+tmpObj[tmpObj.length/2]) /2 : tmpObj[tmpObj.length/2-1];
},
});
export default _;
//export default function() { console.log('hi')};