/**
* Define standard data adapters used to retrieve data (usually from REST APIs)
*
* ## Adapters are responsible for retrieving data
* In LocusZoom, the act of fetching data (from API, JSON file, or Tabix) is separate from the act of rendering data.
* Adapters are used to handle retrieving from different sources, and can provide various advanced functionality such
* as caching, data harmonization, and annotating API responses with calculated fields. They can also be used to join
* two data sources, such as annotating association summary statistics with LD information.
*
* Most of LocusZoom's builtin layouts and adapters are written for the field names and data formats of the
* UMich [PortalDev API](https://portaldev.sph.umich.edu/docs/api/v1/#introduction):
* if your data is in a different format, an adapter can be used to coerce or rename fields.
* Although it is possible to change every part of a rendering layout to expect different fields, this is often much
* more work than providing data in the expected format.
*
* ## Creating data adapters
* The documentation in this section describes the available data types and adapters. Real LocusZoom usage almost never
* creates these classes directly: rather, they are defined from configuration objects that ask for a source by name.
*
* The below example creates an object responsible for fetching two different GWAS summary statistics datasets from two different API endpoints, for any data
* layer that asks for fields from `trait1:fieldname` or `trait2:fieldname`.
*
* ```
* const data_sources = new LocusZoom.DataSources();
* data_sources.add("trait1", ["AssociationLZ", {url: "http://server.com/api/single/", params: {source: 1}}]);
* data_sources.add("trait2", ["AssociationLZ", {url: "http://server.com/api/single/", params: {source: 2}}]);
* ```
*
* These data sources are then passed to the plot when data is to be rendered:
* `const plot = LocusZoom.populate("#lz-plot", data_sources, layout);`
*
* @module LocusZoom_Adapters
*/
function validateBuildSource(class_name, build, source) {
// Build OR Source, not both
if ((build && source) || !(build || source)) {
throw new Error(`${class_name} must provide a parameter specifying either "build" or "source". It should not specify both.`);
}
// If the build isn't recognized, our APIs can't transparently select a source to match
if (build && !['GRCh37', 'GRCh38'].includes(build)) {
throw new Error(`${class_name} must specify a valid genome build number`);
}
}
// NOTE: Custom adapaters are annotated with `see` instead of `extend` throughout this file, to avoid clutter in the developer API docs.
// Most people using LZ data sources will never instantiate a class directly and certainly won't be calling internal
// methods, except when implementing a subclass. For most LZ users, it's usually enough to acknowledge that the
// private API methods exist in the base class.
/**
* Base class for LocusZoom data sources (any). See also: BaseApiAdapter for requests from a remote URL.
* @public
*/
class BaseAdapter {
/**
* @param {object} config Configuration options
*/
constructor(config) {
/**
* Whether to enable caching (true for most data adapters)
* @private
* @member {Boolean}
*/
this._enableCache = true;
this._cachedKey = null;
// Almost all LZ sources are "region based". Cache the region requested and use it to determine whether
// the cache would satisfy the request.
this._cache_pos_start = null;
this._cache_pos_end = null;
/**
* Whether this adapter type is dependent on previous requests- for example, the LD source cannot annotate
* association data if no data was found for that region.
* @private
* @member {boolean}
*/
this.__dependentSource = false;
// Parse configuration options
this.parseInit(config);
}
/**
* Parse configuration used to create the instance. Many custom sources will override this method to suit their
* needs (eg specific config options, or for sources that do not retrieve data from a URL)
* @protected
* @param {String|Object} config Basic configuration- either a url, or a config object
* @param {String} [config.url] The datasource URL
* @param {String} [config.params] Initial config params for the datasource
*/
parseInit(config) {
/**
* @private
* @member {Object}
*/
this.params = config.params || {};
}
/**
* A unique identifier that indicates whether cached data is valid for this request. For most sources using GET
* requests to a REST API, this is usually the region requested. Some sources will append additional params to define the request.
*
* This means that to change caching behavior, both the URL and the cache key may need to be updated. However,
* it allows most datasources to skip an extra network request when zooming in.
* @protected
* @param {Object} state Information available in plot.state (chr, start, end). Sometimes used to inject globally
* available information that influences the request being made.
* @param {Object} chain The data chain from previous requests made in a sequence.
* @param fields
* @returns {String}
*/
getCacheKey(state, chain, fields) {
// Most region sources, by default, will cache the largest region that satisfies the request: zooming in
// should be satisfied via the cache, but pan or region change operations will cause a network request
// Some adapters rely on values set in chain.header during the getURL call. (eg, the LD source uses
// this to find the LD refvar) Calling this method is a backwards-compatible way of ensuring that value is set,
// even on a cache hit in which getURL otherwise wouldn't be called.
// Some of the adapters that rely on this behavior are user-defined, hence compatibility hack
this.getURL(state, chain, fields);
const cache_pos_chr = state.chr;
const {_cache_pos_start, _cache_pos_end} = this;
if (_cache_pos_start && state.start >= _cache_pos_start && _cache_pos_end && state.end <= _cache_pos_end ) {
return `${cache_pos_chr}_${_cache_pos_start}_${_cache_pos_end}`;
} else {
return `${state.chr}_${state.start}_${state.end}`;
}
}
/**
* Stub: build the URL for any requests made by this source.
* @protected
*/
getURL(state, chain, fields) {
return this.url;
}
/**
* Perform a network request to fetch data for this source. This is usually the method that is used to override
* when defining how to retrieve data.
* @protected
* @param {Object} state The state of the parent plot
* @param chain
* @param fields
* @returns {Promise}
*/
fetchRequest(state, chain, fields) {
const url = this.getURL(state, chain, fields);
return fetch(url).then((response) => {
if (!response.ok) {
throw new Error(response.statusText);
}
return response.text();
});
}
/**
* Gets the data for just this source, typically via a network request (but using cache where possible)
*
* For most use cases, it is better to override `fetchRequest` instead, to avoid bypassing the cache mechanism
* by accident.
* @protected
* @return {Promise}
*/
getRequest(state, chain, fields) {
let req;
const cacheKey = this.getCacheKey(state, chain, fields);
if (this._enableCache && typeof(cacheKey) !== 'undefined' && cacheKey === this._cachedKey) {
req = Promise.resolve(this._cachedResponse); // Resolve to the value of the current promise
} else {
req = this.fetchRequest(state, chain, fields);
if (this._enableCache) {
this._cachedKey = cacheKey;
this._cache_pos_start = state.start;
this._cache_pos_end = state.end;
this._cachedResponse = req;
}
}
return req;
}
/**
* Ensure the server response is in a canonical form, an array of one object per record. [ {field: oneval} ].
* If the server response contains columns, reformats the response from {column1: [], column2: []} to the above.
*
* Does not apply namespacing, transformations, or field extraction.
*
* May be overridden by data adapters that inherently return more complex payloads, or that exist to annotate other
* sources (eg, if the payload provides extra data rather than a series of records).
* @protected
* @param {Object[]|Object} data The original parsed server response
*/
normalizeResponse(data) {
if (Array.isArray(data)) {
// Already in the desired form
return data;
}
// Otherwise, assume the server response is an object representing columns of data.
// Each array should have the same length (verify), and a given array index corresponds to a single row.
const keys = Object.keys(data);
const N = data[keys[0]].length;
const sameLength = keys.every(function (key) {
const item = data[key];
return item.length === N;
});
if (!sameLength) {
throw new Error(`${this.constructor.name} expects a response in which all arrays of data are the same length`);
}
// Go down the rows, and create an object for each record
const records = [];
const fields = Object.keys(data);
for (let i = 0; i < N; i++) {
const record = {};
for (let j = 0; j < fields.length; j++) {
record[fields[j]] = data[fields[j]][i];
}
records.push(record);
}
return records;
}
/**
* Hook to post-process the data returned by this source with new, additional behavior.
* (eg cleaning up API values or performing complex calculations on the returned data)
*
* @protected
* @param {Object[]} records The parsed data from the source (eg standardized api response)
* @param {Object} chain The data chain object. For example, chain.headers may provide useful annotation metadata
* @returns {Object[]|Promise} The modified set of records
*/
annotateData(records, chain) {
// Default behavior: no transformations
return records;
}
/**
* Clean up the server records for use by datalayers: extract only certain fields, with the specified names.
* Apply per-field transformations as appropriate.
*
* This hook can be overridden, eg to create a source that always returns all records and ignores the "fields" array.
* This is particularly common for sources at the end of a chain- many "dependent" sources do not allow
* cherry-picking individual fields, in which case by **convention** the fields array specifies "last_source_name:all"
*
* @protected
* @param {Object[]} data One record object per element
* @param {String[]} fields The names of fields to extract (as named in the source data). Eg "afield"
* @param {String[]} outnames How to represent the source fields in the output. Eg "namespace:afield|atransform"
* @param {function[]} trans An array of transformation functions (if any). One function per data element, or null.
* @protected
*/
extractFields (data, fields, outnames, trans) {
//intended for an array of objects
// [ {"id":1, "val":5}, {"id":2, "val":10}]
// Since a number of sources exist that do not obey this format, we will provide a convenient pass-through
if (!Array.isArray(data)) {
return data;
}
if (!data.length) {
// Sometimes there are regions that just don't have data- this should not trigger a missing field error message!
return data;
}
const fieldFound = [];
for (let k = 0; k < fields.length; k++) {
fieldFound[k] = 0;
}
const records = data.map(function (item) {
const output_record = {};
for (let j = 0; j < fields.length; j++) {
let val = item[fields[j]];
if (typeof val != 'undefined') {
fieldFound[j] = 1;
}
if (trans && trans[j]) {
val = trans[j](val);
}
output_record[outnames[j]] = val;
}
return output_record;
});
fieldFound.forEach(function(v, i) {
if (!v) {
throw new Error(`field ${fields[i]} not found in response for ${outnames[i]}`);
}
});
return records;
}
/**
* Combine records from this source with others in the chain to yield final chain body.
* Handles merging this data with other sources (if applicable).
*
* @protected
* @param {Object[]} data The data That would be returned from this source alone
* @param {Object} chain The data chain built up during previous requests
* @param {String[]} fields
* @param {String[]} outnames
* @param {String[]} trans
* @return {Promise|Object[]} The new chain body
*/
combineChainBody(data, chain, fields, outnames, trans) {
return data;
}
/**
* Coordinates the work of parsing a response and returning records. This is broken into 4 steps, which may be
* overridden separately for fine-grained control. Each step can return either raw data or a promise.
*
* @see {module:LocusZoom_Adapters~BaseAdapter#normalizeResponse}
* @see {module:LocusZoom_Adapters~BaseAdapter#annotateData}
* @see {module:LocusZoom_Adapters~BaseAdapter#extractFields}
* @see {module:LocusZoom_Adapters~BaseAdapter#combineChainBody}
* @protected
*
* @param {String|Object} resp The raw data associated with the response
* @param {Object} chain The combined parsed response data from this and all other requests made in the chain
* @param {String[]} fields Array of requested field names (as they would appear in the response payload)
* @param {String[]} outnames Array of field names as they will be represented in the data returned by this source,
* including the namespace. This must be an array with the same length as `fields`
* @param {Function[]} trans The collection of transformation functions to be run on selected fields.
* This must be an array with the same length as `fields`
* @returns {Promise} A promise that resolves to an object containing
* request metadata (`headers: {}`), the consolidated data for plotting (`body: []`), and the individual responses that would be
* returned by each source in the chain in isolation (`discrete: {}`)
*/
parseResponse (resp, chain, fields, outnames, trans) {
const source_id = this.source_id || this.constructor.name;
if (!chain.discrete) {
chain.discrete = {};
}
const json = typeof resp == 'string' ? JSON.parse(resp) : resp;
// Perform the 4 steps of parsing the payload and return a combined chain object
return Promise.resolve(this.normalizeResponse(json.data || json))
.then((standardized) => {
// Perform calculations on the data from just this source
return Promise.resolve(this.annotateData(standardized, chain));
}).then((data) => {
return Promise.resolve(this.extractFields(data, fields, outnames, trans));
}).then((one_source_body) => {
// Store a copy of the data that would be returned by parsing this source in isolation (and taking the
// fields array into account). This is useful when we want to re-use the source output in many ways.
chain.discrete[source_id] = one_source_body;
return Promise.resolve(this.combineChainBody(one_source_body, chain, fields, outnames, trans));
}).then((new_body) => {
return { header: chain.header || {}, discrete: chain.discrete, body: new_body };
});
}
/**
* Fetch the data from the specified data adapter, and apply transformations requested by an external consumer.
* This is the public-facing datasource method that will most be called by the plot, but custom data adapters will
* almost never want to override this method directly- more specific hooks are provided to control individual pieces
* of the request lifecycle.
*
* @private
* @param {Object} state The current "state" of the plot, such as chromosome and start/end positions
* @param {String[]} fields Array of field names that the plot has requested from this data adapter. (without the "namespace" prefix)
* @param {String[]} outnames Array describing how the output data should refer to this field. This represents the
* originally requested field name, including the namespace. This must be an array with the same length as `fields`
* @param {Function[]} trans The collection of transformation functions to be run on selected fields.
* This must be an array with the same length as `fields`
* @returns {function} A callable operation that can be used as part of the data chain
*/
getData(state, fields, outnames, trans) {
if (this.preGetData) { // TODO try to remove this method if at all possible
const pre = this.preGetData(state, fields, outnames, trans);
if (this.pre) {
state = pre.state || state;
fields = pre.fields || fields;
outnames = pre.outnames || outnames;
trans = pre.trans || trans;
}
}
return (chain) => {
if (this.__dependentSource && chain && chain.body && !chain.body.length) {
// A "dependent" source should not attempt to fire a request if there is no data for it to act on.
// Therefore, it should simply return the previous data chain.
return Promise.resolve(chain);
}
return this.getRequest(state, chain, fields).then((resp) => {
return this.parseResponse(resp, chain, fields, outnames, trans);
});
};
}
}
/**
* Base class for LocusZoom data adapters that receive their data over the web. Adds default config parameters
* (and potentially other behavior) that are relevant to URL-based requests.
* @extends module:LocusZoom_Adapters~BaseAdapter
* @param {string} config.url The URL for the remote dataset. By default, most adapters perform a GET request.
* @inheritDoc
*/
class BaseApiAdapter extends BaseAdapter {
parseInit(config) {
super.parseInit(config);
/**
* @private
* @member {String}
*/
this.url = config.url;
if (!this.url) {
throw new Error('Source not initialized with required URL');
}
}
}
/**
* Retrieve Association Data from the LocusZoom/ Portaldev API (or compatible). Defines how to make a request
* to a specific REST API.
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
* @param {string} config.url The base URL for the remote data.
* @param {object} config.params
* @param [config.params.sort=false] Whether to sort the association data (by an assumed field named "position"). This
* is primarily a site-specific workaround for a particular LZ usage; we encourage apis to sort by position before returning
* data to the browser.
* @param [config.params.source] The ID of the GWAS dataset to use for this request, as matching the API backend
*/
class AssociationLZ extends BaseApiAdapter {
preGetData (state, fields, outnames, trans) {
// TODO: Modify internals to see if we can go without this method
const id_field = this.params.id_field || 'id';
[id_field, 'position'].forEach(function(x) {
if (!fields.includes(x)) {
fields.unshift(x);
outnames.unshift(x);
trans.unshift(null);
}
});
return {fields: fields, outnames:outnames, trans:trans};
}
/**
* Add query parameters to the URL to construct a query for the specified region
*/
getURL (state, chain, fields) {
const analysis = chain.header.analysis || this.params.source || this.params.analysis; // Old usages called this param "analysis"
if (typeof analysis == 'undefined') {
throw new Error('Association source must specify an analysis ID to plot');
}
return `${this.url}results/?filter=analysis in ${analysis} and chromosome in '${state.chr}' and position ge ${state.start} and position le ${state.end}`;
}
/**
* Some association sources do not sort their data in a predictable order, which makes it hard to reliably
* align with other sources (such as LD). For performance reasons, sorting is an opt-in argument.
* TODO: Consider more fine grained sorting control in the future. This was added as a very specific
* workaround for the original T2D portal.
* @protected
* @param data
* @return {Object}
*/
normalizeResponse (data) {
data = super.normalizeResponse(data);
if (this.params && this.params.sort && data.length && data[0]['position']) {
data.sort(function (a, b) {
return a['position'] - b['position'];
});
}
return data;
}
}
/**
* Fetch linkage disequilibrium information from a UMich LDServer-compatible API, relative to a reference variant.
* If no `plot.state.ldrefvar` is explicitly provided, this source will attempt to find the most significant GWAS
* variant (smallest pvalue or largest neg_log_pvalue) and yse that as the LD reference variant.
*
* This source is designed to connect its results to association data, and therefore depends on association data having
* been loaded by a previous request in the data chain. For custom association APIs, some additional options might
* need to be be specified in order to locate the most significant SNP. Variant IDs of the form `chrom:pos_ref/alt`
* are preferred, but this source will attempt to harmonize other common data formats into something that the LD
* server can understand.
*
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
*/
class LDServer extends BaseApiAdapter {
/**
* @param {string} config.url The base URL for the remote data.
* @param {object} config.params
* @param [config.params.build='GRCh37'] The genome build to use when calculating LD relative to a specified reference variant.
* May be overridden by a global parameter `plot.state.genome_build` so that all datasets can be fetched for the appropriate build in a consistent way.
* @param [config.params.source='1000G'] The name of the reference panel to use, as specified in the LD server instance.
* May be overridden by a global parameter `plot.state.ld_source` to implement widgets that alter LD display.
* @param [config.params.population='ALL'] The sample population used to calculate LD for a specified source;
* population names vary depending on the reference panel and how the server was populated wth data.
* May be overridden by a global parameter `plot.state.ld_pop` to implement widgets that alter LD display.
* @param [config.params.method='rsquare'] The metric used to calculate LD
* @param [config.params.id_field] The association data field that contains variant identifier information. The preferred
* format of LD server is `chrom:pos_ref/alt` and this source will attempt to normalize other common formats.
* This source can auto-detect possible matches for field names containing "variant" or "id"
* @param [config.params.position_field] The association data field that contains variant position information.
* This source can auto-detect possible matches for field names containing "position" or "pos"
* @param [config.params.pvalue_field] The association data field that contains pvalue information.
* This source can auto-detect possible matches for field names containing "pvalue" or "log_pvalue".
* The suggested LD refvar will be the smallest pvalue, or the largest log_pvalue: this source will auto-detect
* the word "log" in order to determine the sign of the comparison.
*/
constructor(config) {
super(config);
this.__dependentSource = true;
}
preGetData(state, fields) {
if (fields.length > 1) {
if (fields.length !== 2 || !fields.includes('isrefvar')) {
throw new Error(`LD does not know how to get all fields: ${fields.join(', ')}`);
}
}
}
findMergeFields(chain) {
// Find the fields (as provided by a previous step in the chain, like an association source) that will be needed to
// combine LD data with existing information
// Since LD information may be shared across multiple assoc sources with different namespaces,
// we use regex to find columns to join on, rather than requiring exact matches
const exactMatch = function (arr) {
return function () {
const regexes = arguments;
for (let i = 0; i < regexes.length; i++) {
const regex = regexes[i];
const m = arr.filter(function (x) {
return x.match(regex);
});
if (m.length) {
return m[0];
}
}
return null;
};
};
let dataFields = {
id: this.params.id_field,
position: this.params.position_field,
pvalue: this.params.pvalue_field,
_names_:null,
};
if (chain && chain.body && chain.body.length > 0) {
const names = Object.keys(chain.body[0]);
const nameMatch = exactMatch(names);
// Internally, fields are generally prefixed with the name of the source they come from.
// If the user provides an id_field (like `variant`), it should work across data sources (`assoc1:variant`,
// assoc2:variant), but not match fragments of other field names (assoc1:variant_thing)
// Note: these lookups hard-code a couple of common fields that will work based on known APIs in the wild
const id_match = dataFields.id && nameMatch(new RegExp(`${dataFields.id}\\b`));
dataFields.id = id_match || nameMatch(/\bvariant\b/) || nameMatch(/\bid\b/);
dataFields.position = dataFields.position || nameMatch(/\bposition\b/i, /\bpos\b/i);
dataFields.pvalue = dataFields.pvalue || nameMatch(/\bpvalue\b/i, /\blog_pvalue\b/i);
dataFields._names_ = names;
}
return dataFields;
}
findRequestedFields (fields, outnames) {
// Assumption: all usages of this source will only ever ask for "isrefvar" or "state". This maps to output names.
let obj = {};
for (let i = 0; i < fields.length; i++) {
if (fields[i] === 'isrefvar') {
obj.isrefvarin = fields[i];
obj.isrefvarout = outnames && outnames[i];
} else {
obj.ldin = fields[i];
obj.ldout = outnames && outnames[i];
}
}
return obj;
}
/**
* The LD API payload does not obey standard format conventions; do not try to transform it.
*/
normalizeResponse (data) {
return data;
}
/**
* Get the LD reference variant, which by default will be the most significant hit in the assoc results
* This will be used in making the original query to the LD server for pairwise LD information.
*
* This is meant to join a single LD request to any number of association results, and to work with many kinds of API.
* To do this, the datasource looks for fields with special known names such as pvalue, log_pvalue, etc.
* If your API uses different nomenclature, an option must be specified.
*
* @protected
* @returns {String[]} Two strings: 1) the marker id (expected to be in `chr:pos_ref/alt` format) of the reference
* variant, and 2) the marker ID as it appears in the original dataset that we are joining to, so that the exact
* refvar can be marked when plotting the data..
*/
getRefvar(state, chain, fields) {
let findExtremeValue = function(records, pval_field) {
// Finds the most significant hit (smallest pvalue, or largest -log10p). Will try to auto-detect the appropriate comparison.
pval_field = pval_field || 'log_pvalue'; // The official LZ API returns log_pvalue
const is_log = /log/.test(pval_field);
let cmp;
if (is_log) {
cmp = function(a, b) {
return a > b;
};
} else {
cmp = function(a, b) {
return a < b;
};
}
let extremeVal = records[0][pval_field], extremeIdx = 0;
for (let i = 1; i < records.length; i++) {
if (cmp(records[i][pval_field], extremeVal)) {
extremeVal = records[i][pval_field];
extremeIdx = i;
}
}
return extremeIdx;
};
let reqFields = this.findRequestedFields(fields);
let refVar = reqFields.ldin;
if (refVar === 'state') {
refVar = state.ldrefvar || chain.header.ldrefvar || 'best';
}
if (refVar === 'best') {
if (!chain.body) {
throw new Error('No association data found to find best pvalue');
}
let keys = this.findMergeFields(chain);
if (!keys.pvalue || !keys.id) {
let columns = '';
if (!keys.id) {
columns += `${columns.length ? ', ' : ''}id`;
}
if (!keys.pvalue) {
columns += `${columns.length ? ', ' : ''}pvalue`;
}
throw new Error(`Unable to find necessary column(s) for merge: ${columns} (available: ${keys._names_})`);
}
refVar = chain.body[findExtremeValue(chain.body, keys.pvalue)][keys.id];
}
// Some datasets, notably the Portal, use a different marker format.
// Coerce it into one that will work with the LDServer API. (CHROM:POS_REF/ALT)
const REGEX_MARKER = /^(?:chr)?([a-zA-Z0-9]+?)[_:-](\d+)[_:|-]?(\w+)?[/_:|-]?([^_]+)?_?(.*)?/;
const match = refVar && refVar.match(REGEX_MARKER);
if (!match) {
throw new Error('Could not request LD for a missing or incomplete marker format');
}
const [original, chrom, pos, ref, alt] = match;
// Currently, the LD server only accepts full variant specs; it won't return LD w/o ref+alt. Allowing
// a partial match at most leaves room for potential future features.
let refVar_formatted = `${chrom}:${pos}`;
if (ref && alt) {
refVar_formatted += `_${ref}/${alt}`;
}
return [refVar_formatted, original];
}
/**
* Identify (or guess) the LD reference variant, then add query parameters to the URL to construct a query for the specified region
*/
getURL(state, chain, fields) {
// The LD source/pop can be overridden from plot.state for dynamic layouts
const build = state.genome_build || this.params.build || 'GRCh37'; // This isn't expected to change after the data is plotted.
let source = state.ld_source || this.params.source || '1000G';
const population = state.ld_pop || this.params.population || 'ALL'; // LDServer panels will always have an ALL
const method = this.params.method || 'rsquare';
if (source === '1000G' && build === 'GRCh38') {
// For build 38 (only), there is a newer/improved 1000G LD panel available that uses WGS data. Auto upgrade by default.
source = '1000G-FRZ09';
}
validateBuildSource(this.constructor.name, build, null); // LD doesn't need to validate `source` option
const [refVar_formatted, refVar_raw] = this.getRefvar(state, chain, fields);
// Preserve the user-provided variant spec for use when matching to assoc data
chain.header.ldrefvar = refVar_raw;
return [
this.url, 'genome_builds/', build, '/references/', source, '/populations/', population, '/variants',
'?correlation=', method,
'&variant=', encodeURIComponent(refVar_formatted),
'&chrom=', encodeURIComponent(state.chr),
'&start=', encodeURIComponent(state.start),
'&stop=', encodeURIComponent(state.end),
].join('');
}
/**
* The LD adapter caches based on region, reference panel, and population name
* @param state
* @param chain
* @param fields
* @return {string}
*/
getCacheKey(state, chain, fields) {
const base = super.getCacheKey(state, chain, fields);
let source = state.ld_source || this.params.source || '1000G';
const population = state.ld_pop || this.params.population || 'ALL'; // LDServer panels will always have an ALL
const [refVar, _] = this.getRefvar(state, chain, fields);
return `${base}_${refVar}_${source}_${population}`;
}
/**
* The LD adapter attempts to intelligently match retrieved LD information to a request for association data earlier in the data chain.
* Since each layer only asks for the data needed for that layer, one LD call is sufficient to annotate many separate association tracks.
*/
combineChainBody(data, chain, fields, outnames, trans) {
let keys = this.findMergeFields(chain);
let reqFields = this.findRequestedFields(fields, outnames);
if (!keys.position) {
throw new Error(`Unable to find position field for merge: ${keys._names_}`);
}
const leftJoin = function (left, right, lfield, rfield) {
let i = 0, j = 0;
while (i < left.length && j < right.position2.length) {
if (left[i][keys.position] === right.position2[j]) {
left[i][lfield] = right[rfield][j];
i++;
j++;
} else if (left[i][keys.position] < right.position2[j]) {
i++;
} else {
j++;
}
}
};
const tagRefVariant = function (data, refvar, idfield, outrefname, outldname) {
for (let i = 0; i < data.length; i++) {
if (data[i][idfield] && data[i][idfield] === refvar) {
data[i][outrefname] = 1;
data[i][outldname] = 1; // For label/filter purposes, implicitly mark the ref var as LD=1 to itself
} else {
data[i][outrefname] = 0;
}
}
};
// LD servers vary slightly. Some report corr as "rsquare", others as "correlation"
let corrField = data.rsquare ? 'rsquare' : 'correlation';
leftJoin(chain.body, data, reqFields.ldout, corrField);
if (reqFields.isrefvarin && chain.header.ldrefvar) {
tagRefVariant(chain.body, chain.header.ldrefvar, keys.id, reqFields.isrefvarout, reqFields.ldout);
}
return chain.body;
}
/**
* The LDServer API is paginated, but we need all of the data to render a plot. Depaginate and combine where appropriate.
*/
fetchRequest(state, chain, fields) {
let url = this.getURL(state, chain, fields);
let combined = { data: {} };
let chainRequests = function (url) {
return fetch(url).then().then((response) => {
if (!response.ok) {
throw new Error(response.statusText);
}
return response.text();
}).then(function(payload) {
payload = JSON.parse(payload);
Object.keys(payload.data).forEach(function (key) {
combined.data[key] = (combined.data[key] || []).concat(payload.data[key]);
});
if (payload.next) {
return chainRequests(payload.next);
}
return combined;
});
};
return chainRequests(url);
}
}
/**
* Fetch GWAS catalog data for a list of known variants, and align the data with previously fetched association data.
* There can be more than one claim per variant; this adapter is written to support a visualization in which each
* association variant is labeled with the single most significant hit in the GWAS catalog. (and enough information to link to the external catalog for more information)
*
* Sometimes the GWAS catalog uses rsIDs that could refer to more than one variant (eg multiple alt alleles are
* possible for the same rsID). To avoid missing possible hits due to ambiguous meaning, we connect the assoc
* and catalog data via the position field, not the full variant specifier. This source will auto-detect the matching
* field in association data by looking for the field name `position` or `pos`.
*
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
*/
class GwasCatalogLZ extends BaseApiAdapter {
/**
* @param {string} config.url The base URL for the remote data.
* @param {Object} config.params
* @param [config.params.build] The genome build to use when calculating LD relative to a specified reference variant.
* May be overridden by a global parameter `plot.state.genome_build` so that all datasets can be fetched for the appropriate build in a consistent way.
* @param {Number} [config.params.source] The ID of the chosen catalog. Most usages should omit this parameter and
* let LocusZoom choose the newest available dataset to use based on the genome build: defaults to recent EBI GWAS catalog, GRCh37.
*/
constructor(config) {
super(config);
this.__dependentSource = true;
}
/**
* Add query parameters to the URL to construct a query for the specified region
*/
getURL(state, chain, fields) {
// This is intended to be aligned with another source- we will assume they are always ordered by position, asc
// (regardless of the actual match field)
const build = state.genome_build || this.params.build;
const source = this.params.source;
validateBuildSource(this.constructor.name, build, source);
// If a build name is provided, it takes precedence (the API will attempt to auto-select newest dataset based on the requested genome build).
// Build and source are mutually exclusive, because hard-coded source IDs tend to be out of date
const source_query = build ? `&build=${build}` : ` and id eq ${source}`;
return `${this.url }?format=objects&sort=pos&filter=chrom eq '${state.chr}' and pos ge ${state.start} and pos le ${state.end}${source_query}`;
}
findMergeFields(records) {
// Data from previous sources is already namespaced. Find the alignment field by matching.
const knownFields = Object.keys(records);
// Note: All API endoints involved only give results for 1 chromosome at a time; match is implied
const posMatch = knownFields.find(function (item) {
return item.match(/\b(position|pos)\b/i);
});
if (!posMatch) {
throw new Error('Could not find data to align with GWAS catalog results');
}
return { 'pos': posMatch };
}
extractFields (data, fields, outnames, trans) {
// Skip the "individual field extraction" step; extraction will be handled when building chain body instead
return data;
}
/**
* Intelligently combine the LD data with the association data used for this data layer. See class description
* for a summary of how this works.
*/
combineChainBody(data, chain, fields, outnames, trans) {
if (!data.length) {
return chain.body;
}
// TODO: Better reuse options in the future. This source is very specifically tied to the UM PortalDev API, where
// the field name is always "log_pvalue". Relatively few sites will write their own gwas-catalog endpoint.
const decider = 'log_pvalue';
const decider_out = outnames[fields.indexOf(decider)];
function leftJoin(left, right, fields, outnames, trans) { // Add `fields` from `right` to `left`
// Add a synthetic, un-namespaced field to all matching records
const n_matches = left['n_catalog_matches'] || 0;
left['n_catalog_matches'] = n_matches + 1;
if (decider && left[decider_out] && left[decider_out] > right[decider]) {
// There may be more than one GWAS catalog entry for the same SNP. This source is intended for a 1:1
// annotation scenario, so for now it only joins the catalog entry that has the best -log10 pvalue
return;
}
for (let j = 0; j < fields.length; j++) {
const fn = fields[j];
const outn = outnames[j];
let val = right[fn];
if (trans && trans[j]) {
val = trans[j](val);
}
left[outn] = val;
}
}
const chainNames = this.findMergeFields(chain.body[0]);
const catNames = this.findMergeFields(data[0]);
var i = 0, j = 0;
while (i < chain.body.length && j < data.length) {
var left = chain.body[i];
var right = data[j];
if (left[chainNames.pos] === right[catNames.pos]) {
// There may be multiple catalog entries for each matching SNP; evaluate match one at a time
leftJoin(left, right, fields, outnames, trans);
j += 1;
} else if (left[chainNames.pos] < right[catNames.pos]) {
i += 1;
} else {
j += 1;
}
}
return chain.body;
}
}
/**
* Retrieve Gene Data, as fetched from the LocusZoom/Portaldev API server (or compatible format)
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
* @param {string} config.url The base URL for the remote data
* @param {Object} config.params
* @param [config.params.build] The genome build to use when calculating LD relative to a specified reference variant.
* May be overridden by a global parameter `plot.state.genome_build` so that all datasets can be fetched for the appropriate build in a consistent way.
* @param {Number} [config.params.source] The ID of the chosen gene dataset. Most usages should omit this parameter and
* let LocusZoom choose the newest available dataset to use based on the genome build: defaults to recent GENCODE data, GRCh37.
*/
class GeneLZ extends BaseApiAdapter {
/**
* Add query parameters to the URL to construct a query for the specified region
*/
getURL(state, chain, fields) {
const build = state.genome_build || this.params.build;
let source = this.params.source;
validateBuildSource(this.constructor.name, build, source);
// If a build name is provided, it takes precedence (the API will attempt to auto-select newest dataset based on the requested genome build).
// Build and source are mutually exclusive, because hard-coded source IDs tend to be out of date
const source_query = build ? `&build=${build}` : ` and source in ${source}`;
return `${this.url}?filter=chrom eq '${state.chr}' and start le ${state.end} and end ge ${state.start}${source_query}`;
}
/**
* The UM genes API has a very complex internal data format. Bypass any record parsing, and provide the data layer with
* the exact information returned by the API. (ignoring the fields array in the layout)
* @param data
* @return {Object[]|Object}
*/
normalizeResponse(data) {
return data;
}
/**
* Does not attempt to namespace or modify the fields from the API payload; the payload format is very complex and
* quite coupled with the data rendering implementation.
* Typically, requests to this adapter specify a single dummy field sufficient to trigger the request: `fields:[ 'gene:all' ]`
*/
extractFields(data, fields, outnames, trans) {
return data;
}
}
/**
* Retrieve Gene Constraint Data, as fetched from the gnomAD server (or compatible graphQL api endpoint)
*
* This is intended to be the second request in a chain, with special logic that connects it to Genes data
* already fetched. It assumes that the genes data is returned from the UM API, and thus the logic involves
* matching on specific assumptions about `gene_name` format.
*
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
*/
class GeneConstraintLZ extends BaseApiAdapter {
/**
* @param {string} config.url The base URL for the remote data
* @param {Object} config.params
* @param [config.params.build] The genome build to use when calculating LD relative to a specified reference variant.
* May be overridden by a global parameter `plot.state.genome_build` so that all datasets can be fetched for the appropriate build in a consistent way.
*/
constructor(config) {
super(config);
this.__dependentSource = true;
}
/**
* GraphQL API: request details are encoded in the body, not the URL
*/
getURL() {
return this.url;
}
/**
* The gnomAD API has a very complex internal data format. Bypass any record parsing, and provide the data layer with
* the exact information returned by the API.
*/
normalizeResponse(data) {
return data;
}
fetchRequest(state, chain, fields) {
const build = state.genome_build || this.params.build;
if (!build) {
throw new Error(`Adapter ${this.constructor.name} must specify a 'genome_build' option`);
}
const unique_gene_names = chain.body.reduce(
// In rare cases, the same gene symbol may appear at multiple positions. (issue #179) We de-duplicate the
// gene names to avoid issuing a malformed GraphQL query.
function (acc, gene) {
acc[gene.gene_name] = null;
return acc;
},
{}
);
let query = Object.keys(unique_gene_names).map(function (gene_name) {
// GraphQL alias names must match a specific set of allowed characters: https://stackoverflow.com/a/45757065/1422268
const alias = `_${gene_name.replace(/[^A-Za-z0-9_]/g, '_')}`;
// Each gene symbol is a separate graphQL query, grouped into one request using aliases
return `${alias}: gene(gene_symbol: "${gene_name}", reference_genome: ${build}) { gnomad_constraint { exp_syn obs_syn syn_z oe_syn oe_syn_lower oe_syn_upper exp_mis obs_mis mis_z oe_mis oe_mis_lower oe_mis_upper exp_lof obs_lof pLI oe_lof oe_lof_lower oe_lof_upper } } `;
});
if (!query.length || query.length > 25 || build === 'GRCh38') {
// Skip the API request when it would make no sense:
// - Build 38 (gnomAD supports build GRCh37 only; don't hit server when invalid. This isn't future proof, but we try to be good neighbors.)
// - Too many genes (gnomAD appears max cost ~25 genes)
// - No genes in region (hence no constraint info)
return Promise.resolve({ data: null });
}
query = `{${query.join(' ')} }`; // GraphQL isn't quite JSON; items are separated by spaces but not commas
const url = this.getURL(state, chain, fields);
// See: https://graphql.org/learn/serving-over-http/
const body = JSON.stringify({ query: query });
const headers = { 'Content-Type': 'application/json' };
// Note: The gnomAD API sometimes fails randomly.
// If request blocked, return a fake "no data" signal so the genes track can still render w/o constraint info
return fetch(url, { method: 'POST', body, headers }).then((response) => {
if (!response.ok) {
return [];
}
return response.text();
}).catch((err) => []);
}
/**
* Annotate GENCODE data (from a previous request to the genes adapter) with additional gene constraint data from
* the gnomAD API. See class description for a summary of how this works.
*/
combineChainBody(data, chain, fields, outnames, trans) {
if (!data) {
return chain;
}
chain.body.forEach(function(gene) {
// Find payload keys that match gene names in this response
const alias = `_${gene.gene_name.replace(/[^A-Za-z0-9_]/g, '_')}`; // aliases are modified gene names
const constraint = data[alias] && data[alias]['gnomad_constraint']; // gnomad API has two ways of specifying missing data for a requested gene
if (constraint) {
// Add all fields from constraint data- do not override fields present in the gene source
Object.keys(constraint).forEach(function (key) {
let val = constraint[key];
if (typeof gene[key] === 'undefined') {
if (typeof val == 'number' && val.toString().includes('.')) {
val = parseFloat(val.toFixed(2));
}
gene[key] = val; // These two sources are both designed to bypass namespacing
}
});
}
});
return chain.body;
}
}
/**
* Retrieve Recombination Rate Data, as fetched from the LocusZoom API server (or compatible)
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
* @param {string} config.url The base URL for the remote data
* @param {Object} config.params
* @param [config.params.build] The genome build to use when calculating LD relative to a specified reference variant.
* May be overridden by a global parameter `plot.state.genome_build` so that all datasets can be fetched for the appropriate build in a consistent way.
* @param {Number} [config.params.source] The ID of the chosen dataset. Most usages should omit this parameter and
* let LocusZoom choose the newest available dataset to use based on the genome build: defaults to recent HAPMAP recombination rate, GRCh37.
*/
class RecombLZ extends BaseApiAdapter {
/**
* Add query parameters to the URL to construct a query for the specified region
*/
getURL(state, chain, fields) {
const build = state.genome_build || this.params.build;
let source = this.params.source;
validateBuildSource(this.constructor.SOURCE_NAME, build, source);
// If a build name is provided, it takes precedence (the API will attempt to auto-select newest dataset based on the requested genome build).
// Build and source are mutually exclusive, because hard-coded source IDs tend to be out of date
const source_query = build ? `&build=${build}` : ` and id in ${source}`;
return `${this.url}?filter=chromosome eq '${state.chr}' and position le ${state.end} and position ge ${state.start}${source_query}`;
}
}
/**
* Retrieve static blobs of data as raw JS objects. This does not perform additional parsing, which is required
* for some sources (eg it does not know how to join together LD and association data).
*
* Therefore it is the responsibility of the user to pass information in a format that can be read and
* understood by the chosen plot- a StaticJSON source is rarely a drop-in replacement for existing layouts.
*
* This source is largely here for legacy reasons. More often, a convenient way to serve static data is as separate
* JSON files to an existing source (with the JSON url in place of an API).
*
* Note: The name is a bit misleading. It receives JS objects, not strings serialized as "json".
* @public
* @see module:LocusZoom_Adapters~BaseAdapter
* @param {object} data The data to be returned by this source (subject to namespacing rules)
*/
class StaticSource extends BaseAdapter {
parseInit(data) {
// Does not receive any config; the only argument is the raw data, embedded when source is created
this._data = data;
}
getRequest(state, chain, fields) {
return Promise.resolve(this._data);
}
}
/**
* Retrieve PheWAS data retrieved from a LocusZoom/PortalDev compatible API
* @public
* @see module:LocusZoom_Adapters~BaseApiAdapter
* @param {string} config.url The base URL for the remote data
* @param {Object} config.params
* @param {String[]} config.params.build This datasource expects to be provided the name of the genome build that will
* be used to provide pheWAS results for this position. Note positions may not translate between builds.
*/
class PheWASLZ extends BaseApiAdapter {
getURL(state, chain, fields) {
const build = (state.genome_build ? [state.genome_build] : null) || this.params.build;
if (!build || !Array.isArray(build) || !build.length) {
throw new Error(['Adapter', this.constructor.SOURCE_NAME, 'requires that you specify array of one or more desired genome build names'].join(' '));
}
const url = [
this.url,
"?filter=variant eq '", encodeURIComponent(state.variant), "'&format=objects&",
build.map(function (item) {
return `build=${encodeURIComponent(item)}`;
}).join('&'),
];
return url.join('');
}
getCacheKey(state, chain, fields) {
// This is not a region-based source; it doesn't make sense to cache by a region
return this.getURL(state, chain, fields);
}
}
/**
* Base class for "connectors"- this is a highly specialized kind of adapter that is rarely used in most LocusZoom
* deployments. This is meant to be subclassed, rather than used directly.
*
* A connector is a data adapter that makes no server requests and caches no data of its own. Instead, it decides how to
* combine data from other sources in the chain. Connectors are useful when we want to request (or calculate) some
* useful piece of information once, but apply it to many different kinds of record types.
*
* Typically, a subclass will implement the field merging logic in `combineChainBody`.
*
* @public
* @see module:LocusZoom_Adapters~BaseAdapter
*/
class ConnectorSource extends BaseAdapter {
/**
* @param {Object} config.params Additional parameters
* @param {Object} config.params.sources Specify how the hard-coded logic should find the data it relies on in the chain,
* as {internal_name: chain_source_id} pairs. This allows writing a reusable connector that does not need to make
* assumptions about what namespaces a source is using. *
*/
constructor(config) {
super(config);
if (!config || !config.sources) {
throw new Error('Connectors must specify the data they require as config.sources = {internal_name: chain_source_id}} pairs');
}
/**
* Tells the connector how to find the data it relies on
*
* For example, a connector that applies burden test information to the genes layer might specify:
* {gene_ns: "gene", aggregation_ns: "aggregation"}
*
* @member {Object}
* @private
*/
this._source_name_mapping = config.sources;
// Validate that this source has been told how to find the required information
const specified_ids = Object.keys(config.sources);
/** @property {String[]} Specifies the sources that must be provided in the original config object */
this._getRequiredSources().forEach((k) => {
if (!specified_ids.includes(k)) {
// TODO: Fix constructor.name usage in minified bundles
throw new Error(`Configuration for ${this.constructor.name} must specify a source ID corresponding to ${k}`);
}
});
}
// Stub- connectors don't have their own url or data, so the defaults don't make sense
parseInit() {}
getRequest(state, chain, fields) {
// Connectors do not request their own data by definition, but they *do* depend on other sources having been loaded
// first. This method performs basic validation, and preserves the accumulated body from the chain so far.
Object.keys(this._source_name_mapping).forEach((ns) => {
const chain_source_id = this._source_name_mapping[ns];
if (chain.discrete && !chain.discrete[chain_source_id]) {
throw new Error(`${this.constructor.name} cannot be used before loading required data for: ${chain_source_id}`);
}
});
return Promise.resolve(chain.body || []);
}
parseResponse(data, chain, fields, outnames, trans) {
// A connector source does not update chain.discrete, but it may use it. It bypasses data formatting
// and field selection (both are assumed to have been done already, by the previous sources this draws from)
// Because of how the chain works, connectors are not very good at applying new transformations or namespacing.
// Typically connectors are called with `connector_name:all` in the fields array.
return Promise.resolve(this.combineChainBody(data, chain, fields, outnames, trans))
.then(function(new_body) {
return {header: chain.header || {}, discrete: chain.discrete || {}, body: new_body};
});
}
combineChainBody(records, chain) {
// Stub method: specifies how to combine the data
throw new Error('This method must be implemented in a subclass');
}
/**
* Helper method since ES6 doesn't support class fields
* @private
*/
_getRequiredSources() {
throw new Error('Must specify an array that identifes the kind of data required by this source');
}
}
export { BaseAdapter, BaseApiAdapter };
export {
AssociationLZ,
ConnectorSource,
GeneConstraintLZ,
GeneLZ,
GwasCatalogLZ,
LDServer,
PheWASLZ,
RecombLZ,
StaticSource,
};