lib/goog/uri/utils.js

1// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Simple utilities for dealing with URI strings.
17 *
18 * This is intended to be a lightweight alternative to constructing goog.Uri
19 * objects. Whereas goog.Uri adds several kilobytes to the binary regardless
20 * of how much of its functionality you use, this is designed to be a set of
21 * mostly-independent utilities so that the compiler includes only what is
22 * necessary for the task. Estimated savings of porting is 5k pre-gzip and
23 * 1.5k post-gzip. To ensure the savings remain, future developers should
24 * avoid adding new functionality to existing functions, but instead create
25 * new ones and factor out shared code.
26 *
27 * Many of these utilities have limited functionality, tailored to common
28 * cases. The query parameter utilities assume that the parameter keys are
29 * already encoded, since most keys are compile-time alphanumeric strings. The
30 * query parameter mutation utilities also do not tolerate fragment identifiers.
31 *
32 * By design, these functions can be slower than goog.Uri equivalents.
33 * Repeated calls to some of functions may be quadratic in behavior for IE,
34 * although the effect is somewhat limited given the 2kb limit.
35 *
36 * One advantage of the limited functionality here is that this approach is
37 * less sensitive to differences in URI encodings than goog.Uri, since these
38 * functions operate on strings directly, rather than decoding them and
39 * then re-encoding.
40 *
41 * Uses features of RFC 3986 for parsing/formatting URIs:
42 * http://www.ietf.org/rfc/rfc3986.txt
43 *
44 * @author gboyer@google.com (Garrett Boyer) - The "lightened" design.
45 */
46
47goog.provide('goog.uri.utils');
48goog.provide('goog.uri.utils.ComponentIndex');
49goog.provide('goog.uri.utils.QueryArray');
50goog.provide('goog.uri.utils.QueryValue');
51goog.provide('goog.uri.utils.StandardQueryParam');
52
53goog.require('goog.asserts');
54goog.require('goog.string');
55
56
57/**
58 * Character codes inlined to avoid object allocations due to charCode.
59 * @enum {number}
60 * @private
61 */
62goog.uri.utils.CharCode_ = {
63 AMPERSAND: 38,
64 EQUAL: 61,
65 HASH: 35,
66 QUESTION: 63
67};
68
69
70/**
71 * Builds a URI string from already-encoded parts.
72 *
73 * No encoding is performed. Any component may be omitted as either null or
74 * undefined.
75 *
76 * @param {?string=} opt_scheme The scheme such as 'http'.
77 * @param {?string=} opt_userInfo The user name before the '@'.
78 * @param {?string=} opt_domain The domain such as 'www.google.com', already
79 * URI-encoded.
80 * @param {(string|number|null)=} opt_port The port number.
81 * @param {?string=} opt_path The path, already URI-encoded. If it is not
82 * empty, it must begin with a slash.
83 * @param {?string=} opt_queryData The URI-encoded query data.
84 * @param {?string=} opt_fragment The URI-encoded fragment identifier.
85 * @return {string} The fully combined URI.
86 */
87goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo,
88 opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) {
89 var out = '';
90
91 if (opt_scheme) {
92 out += opt_scheme + ':';
93 }
94
95 if (opt_domain) {
96 out += '//';
97
98 if (opt_userInfo) {
99 out += opt_userInfo + '@';
100 }
101
102 out += opt_domain;
103
104 if (opt_port) {
105 out += ':' + opt_port;
106 }
107 }
108
109 if (opt_path) {
110 out += opt_path;
111 }
112
113 if (opt_queryData) {
114 out += '?' + opt_queryData;
115 }
116
117 if (opt_fragment) {
118 out += '#' + opt_fragment;
119 }
120
121 return out;
122};
123
124
125/**
126 * A regular expression for breaking a URI into its component parts.
127 *
128 * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B
129 * As the "first-match-wins" algorithm is identical to the "greedy"
130 * disambiguation method used by POSIX regular expressions, it is natural and
131 * commonplace to use a regular expression for parsing the potential five
132 * components of a URI reference.
133 *
134 * The following line is the regular expression for breaking-down a
135 * well-formed URI reference into its components.
136 *
137 * <pre>
138 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
139 * 12 3 4 5 6 7 8 9
140 * </pre>
141 *
142 * The numbers in the second line above are only to assist readability; they
143 * indicate the reference points for each subexpression (i.e., each paired
144 * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
145 * For example, matching the above expression to
146 * <pre>
147 * http://www.ics.uci.edu/pub/ietf/uri/#Related
148 * </pre>
149 * results in the following subexpression matches:
150 * <pre>
151 * $1 = http:
152 * $2 = http
153 * $3 = //www.ics.uci.edu
154 * $4 = www.ics.uci.edu
155 * $5 = /pub/ietf/uri/
156 * $6 = <undefined>
157 * $7 = <undefined>
158 * $8 = #Related
159 * $9 = Related
160 * </pre>
161 * where <undefined> indicates that the component is not present, as is the
162 * case for the query component in the above example. Therefore, we can
163 * determine the value of the five components as
164 * <pre>
165 * scheme = $2
166 * authority = $4
167 * path = $5
168 * query = $7
169 * fragment = $9
170 * </pre>
171 *
172 * The regular expression has been modified slightly to expose the
173 * userInfo, domain, and port separately from the authority.
174 * The modified version yields
175 * <pre>
176 * $1 = http scheme
177 * $2 = <undefined> userInfo -\
178 * $3 = www.ics.uci.edu domain | authority
179 * $4 = <undefined> port -/
180 * $5 = /pub/ietf/uri/ path
181 * $6 = <undefined> query without ?
182 * $7 = Related fragment without #
183 * </pre>
184 * @type {!RegExp}
185 * @private
186 */
187goog.uri.utils.splitRe_ = new RegExp(
188 '^' +
189 '(?:' +
190 '([^:/?#.]+)' + // scheme - ignore special characters
191 // used by other URL parts such as :,
192 // ?, /, #, and .
193 ':)?' +
194 '(?://' +
195 '(?:([^/?#]*)@)?' + // userInfo
196 '([^/#?]*?)' + // domain
197 '(?::([0-9]+))?' + // port
198 '(?=[/#?]|$)' + // authority-terminating character
199 ')?' +
200 '([^?#]+)?' + // path
201 '(?:\\?([^#]*))?' + // query
202 '(?:#(.*))?' + // fragment
203 '$');
204
205
206/**
207 * The index of each URI component in the return value of goog.uri.utils.split.
208 * @enum {number}
209 */
210goog.uri.utils.ComponentIndex = {
211 SCHEME: 1,
212 USER_INFO: 2,
213 DOMAIN: 3,
214 PORT: 4,
215 PATH: 5,
216 QUERY_DATA: 6,
217 FRAGMENT: 7
218};
219
220
221/**
222 * Splits a URI into its component parts.
223 *
224 * Each component can be accessed via the component indices; for example:
225 * <pre>
226 * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA];
227 * </pre>
228 *
229 * @param {string} uri The URI string to examine.
230 * @return {!Array<string|undefined>} Each component still URI-encoded.
231 * Each component that is present will contain the encoded value, whereas
232 * components that are not present will be undefined or empty, depending
233 * on the browser's regular expression implementation. Never null, since
234 * arbitrary strings may still look like path names.
235 */
236goog.uri.utils.split = function(uri) {
237 // See @return comment -- never null.
238 return /** @type {!Array<string|undefined>} */ (
239 uri.match(goog.uri.utils.splitRe_));
240};
241
242
243/**
244 * @param {?string} uri A possibly null string.
245 * @param {boolean=} opt_preserveReserved If true, percent-encoding of RFC-3986
246 * reserved characters will not be removed.
247 * @return {?string} The string URI-decoded, or null if uri is null.
248 * @private
249 */
250goog.uri.utils.decodeIfPossible_ = function(uri, opt_preserveReserved) {
251 if (!uri) {
252 return uri;
253 }
254
255 return opt_preserveReserved ? decodeURI(uri) : decodeURIComponent(uri);
256};
257
258
259/**
260 * Gets a URI component by index.
261 *
262 * It is preferred to use the getPathEncoded() variety of functions ahead,
263 * since they are more readable.
264 *
265 * @param {goog.uri.utils.ComponentIndex} componentIndex The component index.
266 * @param {string} uri The URI to examine.
267 * @return {?string} The still-encoded component, or null if the component
268 * is not present.
269 * @private
270 */
271goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) {
272 // Convert undefined, null, and empty string into null.
273 return goog.uri.utils.split(uri)[componentIndex] || null;
274};
275
276
277/**
278 * @param {string} uri The URI to examine.
279 * @return {?string} The protocol or scheme, or null if none. Does not
280 * include trailing colons or slashes.
281 */
282goog.uri.utils.getScheme = function(uri) {
283 return goog.uri.utils.getComponentByIndex_(
284 goog.uri.utils.ComponentIndex.SCHEME, uri);
285};
286
287
288/**
289 * Gets the effective scheme for the URL. If the URL is relative then the
290 * scheme is derived from the page's location.
291 * @param {string} uri The URI to examine.
292 * @return {string} The protocol or scheme, always lower case.
293 */
294goog.uri.utils.getEffectiveScheme = function(uri) {
295 var scheme = goog.uri.utils.getScheme(uri);
296 if (!scheme && goog.global.self && goog.global.self.location) {
297 var protocol = goog.global.self.location.protocol;
298 scheme = protocol.substr(0, protocol.length - 1);
299 }
300 // NOTE: When called from a web worker in Firefox 3.5, location maybe null.
301 // All other browsers with web workers support self.location from the worker.
302 return scheme ? scheme.toLowerCase() : '';
303};
304
305
306/**
307 * @param {string} uri The URI to examine.
308 * @return {?string} The user name still encoded, or null if none.
309 */
310goog.uri.utils.getUserInfoEncoded = function(uri) {
311 return goog.uri.utils.getComponentByIndex_(
312 goog.uri.utils.ComponentIndex.USER_INFO, uri);
313};
314
315
316/**
317 * @param {string} uri The URI to examine.
318 * @return {?string} The decoded user info, or null if none.
319 */
320goog.uri.utils.getUserInfo = function(uri) {
321 return goog.uri.utils.decodeIfPossible_(
322 goog.uri.utils.getUserInfoEncoded(uri));
323};
324
325
326/**
327 * @param {string} uri The URI to examine.
328 * @return {?string} The domain name still encoded, or null if none.
329 */
330goog.uri.utils.getDomainEncoded = function(uri) {
331 return goog.uri.utils.getComponentByIndex_(
332 goog.uri.utils.ComponentIndex.DOMAIN, uri);
333};
334
335
336/**
337 * @param {string} uri The URI to examine.
338 * @return {?string} The decoded domain, or null if none.
339 */
340goog.uri.utils.getDomain = function(uri) {
341 return goog.uri.utils.decodeIfPossible_(
342 goog.uri.utils.getDomainEncoded(uri), true /* opt_preserveReserved */);
343};
344
345
346/**
347 * @param {string} uri The URI to examine.
348 * @return {?number} The port number, or null if none.
349 */
350goog.uri.utils.getPort = function(uri) {
351 // Coerce to a number. If the result of getComponentByIndex_ is null or
352 // non-numeric, the number coersion yields NaN. This will then return
353 // null for all non-numeric cases (though also zero, which isn't a relevant
354 // port number).
355 return Number(goog.uri.utils.getComponentByIndex_(
356 goog.uri.utils.ComponentIndex.PORT, uri)) || null;
357};
358
359
360/**
361 * @param {string} uri The URI to examine.
362 * @return {?string} The path still encoded, or null if none. Includes the
363 * leading slash, if any.
364 */
365goog.uri.utils.getPathEncoded = function(uri) {
366 return goog.uri.utils.getComponentByIndex_(
367 goog.uri.utils.ComponentIndex.PATH, uri);
368};
369
370
371/**
372 * @param {string} uri The URI to examine.
373 * @return {?string} The decoded path, or null if none. Includes the leading
374 * slash, if any.
375 */
376goog.uri.utils.getPath = function(uri) {
377 return goog.uri.utils.decodeIfPossible_(
378 goog.uri.utils.getPathEncoded(uri), true /* opt_preserveReserved */);
379};
380
381
382/**
383 * @param {string} uri The URI to examine.
384 * @return {?string} The query data still encoded, or null if none. Does not
385 * include the question mark itself.
386 */
387goog.uri.utils.getQueryData = function(uri) {
388 return goog.uri.utils.getComponentByIndex_(
389 goog.uri.utils.ComponentIndex.QUERY_DATA, uri);
390};
391
392
393/**
394 * @param {string} uri The URI to examine.
395 * @return {?string} The fragment identifier, or null if none. Does not
396 * include the hash mark itself.
397 */
398goog.uri.utils.getFragmentEncoded = function(uri) {
399 // The hash mark may not appear in any other part of the URL.
400 var hashIndex = uri.indexOf('#');
401 return hashIndex < 0 ? null : uri.substr(hashIndex + 1);
402};
403
404
405/**
406 * @param {string} uri The URI to examine.
407 * @param {?string} fragment The encoded fragment identifier, or null if none.
408 * Does not include the hash mark itself.
409 * @return {string} The URI with the fragment set.
410 */
411goog.uri.utils.setFragmentEncoded = function(uri, fragment) {
412 return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : '');
413};
414
415
416/**
417 * @param {string} uri The URI to examine.
418 * @return {?string} The decoded fragment identifier, or null if none. Does
419 * not include the hash mark.
420 */
421goog.uri.utils.getFragment = function(uri) {
422 return goog.uri.utils.decodeIfPossible_(
423 goog.uri.utils.getFragmentEncoded(uri));
424};
425
426
427/**
428 * Extracts everything up to the port of the URI.
429 * @param {string} uri The URI string.
430 * @return {string} Everything up to and including the port.
431 */
432goog.uri.utils.getHost = function(uri) {
433 var pieces = goog.uri.utils.split(uri);
434 return goog.uri.utils.buildFromEncodedParts(
435 pieces[goog.uri.utils.ComponentIndex.SCHEME],
436 pieces[goog.uri.utils.ComponentIndex.USER_INFO],
437 pieces[goog.uri.utils.ComponentIndex.DOMAIN],
438 pieces[goog.uri.utils.ComponentIndex.PORT]);
439};
440
441
442/**
443 * Extracts the path of the URL and everything after.
444 * @param {string} uri The URI string.
445 * @return {string} The URI, starting at the path and including the query
446 * parameters and fragment identifier.
447 */
448goog.uri.utils.getPathAndAfter = function(uri) {
449 var pieces = goog.uri.utils.split(uri);
450 return goog.uri.utils.buildFromEncodedParts(null, null, null, null,
451 pieces[goog.uri.utils.ComponentIndex.PATH],
452 pieces[goog.uri.utils.ComponentIndex.QUERY_DATA],
453 pieces[goog.uri.utils.ComponentIndex.FRAGMENT]);
454};
455
456
457/**
458 * Gets the URI with the fragment identifier removed.
459 * @param {string} uri The URI to examine.
460 * @return {string} Everything preceding the hash mark.
461 */
462goog.uri.utils.removeFragment = function(uri) {
463 // The hash mark may not appear in any other part of the URL.
464 var hashIndex = uri.indexOf('#');
465 return hashIndex < 0 ? uri : uri.substr(0, hashIndex);
466};
467
468
469/**
470 * Ensures that two URI's have the exact same domain, scheme, and port.
471 *
472 * Unlike the version in goog.Uri, this checks protocol, and therefore is
473 * suitable for checking against the browser's same-origin policy.
474 *
475 * @param {string} uri1 The first URI.
476 * @param {string} uri2 The second URI.
477 * @return {boolean} Whether they have the same scheme, domain and port.
478 */
479goog.uri.utils.haveSameDomain = function(uri1, uri2) {
480 var pieces1 = goog.uri.utils.split(uri1);
481 var pieces2 = goog.uri.utils.split(uri2);
482 return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] ==
483 pieces2[goog.uri.utils.ComponentIndex.DOMAIN] &&
484 pieces1[goog.uri.utils.ComponentIndex.SCHEME] ==
485 pieces2[goog.uri.utils.ComponentIndex.SCHEME] &&
486 pieces1[goog.uri.utils.ComponentIndex.PORT] ==
487 pieces2[goog.uri.utils.ComponentIndex.PORT];
488};
489
490
491/**
492 * Asserts that there are no fragment or query identifiers, only in uncompiled
493 * mode.
494 * @param {string} uri The URI to examine.
495 * @private
496 */
497goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) {
498 // NOTE: would use goog.asserts here, but jscompiler doesn't know that
499 // indexOf has no side effects.
500 if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) {
501 throw Error('goog.uri.utils: Fragment or query identifiers are not ' +
502 'supported: [' + uri + ']');
503 }
504};
505
506
507/**
508 * Supported query parameter values by the parameter serializing utilities.
509 *
510 * If a value is null or undefined, the key-value pair is skipped, as an easy
511 * way to omit parameters conditionally. Non-array parameters are converted
512 * to a string and URI encoded. Array values are expanded into multiple
513 * &key=value pairs, with each element stringized and URI-encoded.
514 *
515 * @typedef {*}
516 */
517goog.uri.utils.QueryValue;
518
519
520/**
521 * An array representing a set of query parameters with alternating keys
522 * and values.
523 *
524 * Keys are assumed to be URI encoded already and live at even indices. See
525 * goog.uri.utils.QueryValue for details on how parameter values are encoded.
526 *
527 * Example:
528 * <pre>
529 * var data = [
530 * // Simple param: ?name=BobBarker
531 * 'name', 'BobBarker',
532 * // Conditional param -- may be omitted entirely.
533 * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null,
534 * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null
535 * 'house', ['LosAngeles', 'NewYork', null]
536 * ];
537 * </pre>
538 *
539 * @typedef {!Array<string|goog.uri.utils.QueryValue>}
540 */
541goog.uri.utils.QueryArray;
542
543
544/**
545 * Parses encoded query parameters and calls callback function for every
546 * parameter found in the string.
547 *
548 * Missing value of parameter (e.g. “…&key&…”) is treated as if the value was an
549 * empty string. Keys may be empty strings (e.g. “…&=value&…”) which also means
550 * that “…&=&…” and “…&&…” will result in an empty key and value.
551 *
552 * @param {string} encodedQuery Encoded query string excluding question mark at
553 * the beginning.
554 * @param {function(string, string)} callback Function called for every
555 * parameter found in query string. The first argument (name) will not be
556 * urldecoded (so the function is consistent with buildQueryData), but the
557 * second will. If the parameter has no value (i.e. “=” was not present)
558 * the second argument (value) will be an empty string.
559 */
560goog.uri.utils.parseQueryData = function(encodedQuery, callback) {
561 if (!encodedQuery) {
562 return;
563 }
564 var pairs = encodedQuery.split('&');
565 for (var i = 0; i < pairs.length; i++) {
566 var indexOfEquals = pairs[i].indexOf('=');
567 var name = null;
568 var value = null;
569 if (indexOfEquals >= 0) {
570 name = pairs[i].substring(0, indexOfEquals);
571 value = pairs[i].substring(indexOfEquals + 1);
572 } else {
573 name = pairs[i];
574 }
575 callback(name, value ? goog.string.urlDecode(value) : '');
576 }
577};
578
579
580/**
581 * Appends a URI and query data in a string buffer with special preconditions.
582 *
583 * Internal implementation utility, performing very few object allocations.
584 *
585 * @param {!Array<string|undefined>} buffer A string buffer. The first element
586 * must be the base URI, and may have a fragment identifier. If the array
587 * contains more than one element, the second element must be an ampersand,
588 * and may be overwritten, depending on the base URI. Undefined elements
589 * are treated as empty-string.
590 * @return {string} The concatenated URI and query data.
591 * @private
592 */
593goog.uri.utils.appendQueryData_ = function(buffer) {
594 if (buffer[1]) {
595 // At least one query parameter was added. We need to check the
596 // punctuation mark, which is currently an ampersand, and also make sure
597 // there aren't any interfering fragment identifiers.
598 var baseUri = /** @type {string} */ (buffer[0]);
599 var hashIndex = baseUri.indexOf('#');
600 if (hashIndex >= 0) {
601 // Move the fragment off the base part of the URI into the end.
602 buffer.push(baseUri.substr(hashIndex));
603 buffer[0] = baseUri = baseUri.substr(0, hashIndex);
604 }
605 var questionIndex = baseUri.indexOf('?');
606 if (questionIndex < 0) {
607 // No question mark, so we need a question mark instead of an ampersand.
608 buffer[1] = '?';
609 } else if (questionIndex == baseUri.length - 1) {
610 // Question mark is the very last character of the existing URI, so don't
611 // append an additional delimiter.
612 buffer[1] = undefined;
613 }
614 }
615
616 return buffer.join('');
617};
618
619
620/**
621 * Appends key=value pairs to an array, supporting multi-valued objects.
622 * @param {string} key The key prefix.
623 * @param {goog.uri.utils.QueryValue} value The value to serialize.
624 * @param {!Array<string>} pairs The array to which the 'key=value' strings
625 * should be appended.
626 * @private
627 */
628goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) {
629 if (goog.isArray(value)) {
630 // Convince the compiler it's an array.
631 goog.asserts.assertArray(value);
632 for (var j = 0; j < value.length; j++) {
633 // Convert to string explicitly, to short circuit the null and array
634 // logic in this function -- this ensures that null and undefined get
635 // written as literal 'null' and 'undefined', and arrays don't get
636 // expanded out but instead encoded in the default way.
637 goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs);
638 }
639 } else if (value != null) {
640 // Skip a top-level null or undefined entirely.
641 pairs.push('&', key,
642 // Check for empty string. Zero gets encoded into the url as literal
643 // strings. For empty string, skip the equal sign, to be consistent
644 // with UriBuilder.java.
645 value === '' ? '' : '=',
646 goog.string.urlEncode(value));
647 }
648};
649
650
651/**
652 * Builds a buffer of query data from a sequence of alternating keys and values.
653 *
654 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
655 * first element appended will be an '&', and may be replaced by the caller.
656 * @param {!goog.uri.utils.QueryArray|!Arguments} keysAndValues An array with
657 * alternating keys and values -- see the typedef.
658 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
659 * @return {!Array<string|undefined>} The buffer argument.
660 * @private
661 */
662goog.uri.utils.buildQueryDataBuffer_ = function(
663 buffer, keysAndValues, opt_startIndex) {
664 goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0),
665 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.');
666
667 for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) {
668 goog.uri.utils.appendKeyValuePairs_(
669 keysAndValues[i], keysAndValues[i + 1], buffer);
670 }
671
672 return buffer;
673};
674
675
676/**
677 * Builds a query data string from a sequence of alternating keys and values.
678 * Currently generates "&key&" for empty args.
679 *
680 * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and
681 * values. See the typedef.
682 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
683 * @return {string} The encoded query string, in the form 'a=1&b=2'.
684 */
685goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) {
686 var buffer = goog.uri.utils.buildQueryDataBuffer_(
687 [], keysAndValues, opt_startIndex);
688 buffer[0] = ''; // Remove the leading ampersand.
689 return buffer.join('');
690};
691
692
693/**
694 * Builds a buffer of query data from a map.
695 *
696 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
697 * first element appended will be an '&', and may be replaced by the caller.
698 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
699 * are URI-encoded parameter keys, and the values conform to the contract
700 * specified in the goog.uri.utils.QueryValue typedef.
701 * @return {!Array<string|undefined>} The buffer argument.
702 * @private
703 */
704goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) {
705 for (var key in map) {
706 goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer);
707 }
708
709 return buffer;
710};
711
712
713/**
714 * Builds a query data string from a map.
715 * Currently generates "&key&" for empty args.
716 *
717 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
718 * are URI-encoded parameter keys, and the values are arbitrary types
719 * or arrays. Keys with a null value are dropped.
720 * @return {string} The encoded query string, in the form 'a=1&b=2'.
721 */
722goog.uri.utils.buildQueryDataFromMap = function(map) {
723 var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map);
724 buffer[0] = '';
725 return buffer.join('');
726};
727
728
729/**
730 * Appends URI parameters to an existing URI.
731 *
732 * The variable arguments may contain alternating keys and values. Keys are
733 * assumed to be already URI encoded. The values should not be URI-encoded,
734 * and will instead be encoded by this function.
735 * <pre>
736 * appendParams('http://www.foo.com?existing=true',
737 * 'key1', 'value1',
738 * 'key2', 'value?willBeEncoded',
739 * 'key3', ['valueA', 'valueB', 'valueC'],
740 * 'key4', null);
741 * result: 'http://www.foo.com?existing=true&' +
742 * 'key1=value1&' +
743 * 'key2=value%3FwillBeEncoded&' +
744 * 'key3=valueA&key3=valueB&key3=valueC'
745 * </pre>
746 *
747 * A single call to this function will not exhibit quadratic behavior in IE,
748 * whereas multiple repeated calls may, although the effect is limited by
749 * fact that URL's generally can't exceed 2kb.
750 *
751 * @param {string} uri The original URI, which may already have query data.
752 * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args
753 * An array or argument list conforming to goog.uri.utils.QueryArray.
754 * @return {string} The URI with all query parameters added.
755 */
756goog.uri.utils.appendParams = function(uri, var_args) {
757 return goog.uri.utils.appendQueryData_(
758 arguments.length == 2 ?
759 goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) :
760 goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1));
761};
762
763
764/**
765 * Appends query parameters from a map.
766 *
767 * @param {string} uri The original URI, which may already have query data.
768 * @param {!Object<goog.uri.utils.QueryValue>} map An object where keys are
769 * URI-encoded parameter keys, and the values are arbitrary types or arrays.
770 * Keys with a null value are dropped.
771 * @return {string} The new parameters.
772 */
773goog.uri.utils.appendParamsFromMap = function(uri, map) {
774 return goog.uri.utils.appendQueryData_(
775 goog.uri.utils.buildQueryDataBufferFromMap_([uri], map));
776};
777
778
779/**
780 * Appends a single URI parameter.
781 *
782 * Repeated calls to this can exhibit quadratic behavior in IE6 due to the
783 * way string append works, though it should be limited given the 2kb limit.
784 *
785 * @param {string} uri The original URI, which may already have query data.
786 * @param {string} key The key, which must already be URI encoded.
787 * @param {*=} opt_value The value, which will be stringized and encoded
788 * (assumed not already to be encoded). If omitted, undefined, or null, the
789 * key will be added as a valueless parameter.
790 * @return {string} The URI with the query parameter added.
791 */
792goog.uri.utils.appendParam = function(uri, key, opt_value) {
793 var paramArr = [uri, '&', key];
794 if (goog.isDefAndNotNull(opt_value)) {
795 paramArr.push('=', goog.string.urlEncode(opt_value));
796 }
797 return goog.uri.utils.appendQueryData_(paramArr);
798};
799
800
801/**
802 * Finds the next instance of a query parameter with the specified name.
803 *
804 * Does not instantiate any objects.
805 *
806 * @param {string} uri The URI to search. May contain a fragment identifier
807 * if opt_hashIndex is specified.
808 * @param {number} startIndex The index to begin searching for the key at. A
809 * match may be found even if this is one character after the ampersand.
810 * @param {string} keyEncoded The URI-encoded key.
811 * @param {number} hashOrEndIndex Index to stop looking at. If a hash
812 * mark is present, it should be its index, otherwise it should be the
813 * length of the string.
814 * @return {number} The position of the first character in the key's name,
815 * immediately after either a question mark or a dot.
816 * @private
817 */
818goog.uri.utils.findParam_ = function(
819 uri, startIndex, keyEncoded, hashOrEndIndex) {
820 var index = startIndex;
821 var keyLength = keyEncoded.length;
822
823 // Search for the key itself and post-filter for surronuding punctuation,
824 // rather than expensively building a regexp.
825 while ((index = uri.indexOf(keyEncoded, index)) >= 0 &&
826 index < hashOrEndIndex) {
827 var precedingChar = uri.charCodeAt(index - 1);
828 // Ensure that the preceding character is '&' or '?'.
829 if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND ||
830 precedingChar == goog.uri.utils.CharCode_.QUESTION) {
831 // Ensure the following character is '&', '=', '#', or NaN
832 // (end of string).
833 var followingChar = uri.charCodeAt(index + keyLength);
834 if (!followingChar ||
835 followingChar == goog.uri.utils.CharCode_.EQUAL ||
836 followingChar == goog.uri.utils.CharCode_.AMPERSAND ||
837 followingChar == goog.uri.utils.CharCode_.HASH) {
838 return index;
839 }
840 }
841 index += keyLength + 1;
842 }
843
844 return -1;
845};
846
847
848/**
849 * Regular expression for finding a hash mark or end of string.
850 * @type {RegExp}
851 * @private
852 */
853goog.uri.utils.hashOrEndRe_ = /#|$/;
854
855
856/**
857 * Determines if the URI contains a specific key.
858 *
859 * Performs no object instantiations.
860 *
861 * @param {string} uri The URI to process. May contain a fragment
862 * identifier.
863 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
864 * @return {boolean} Whether the key is present.
865 */
866goog.uri.utils.hasParam = function(uri, keyEncoded) {
867 return goog.uri.utils.findParam_(uri, 0, keyEncoded,
868 uri.search(goog.uri.utils.hashOrEndRe_)) >= 0;
869};
870
871
872/**
873 * Gets the first value of a query parameter.
874 * @param {string} uri The URI to process. May contain a fragment.
875 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
876 * @return {?string} The first value of the parameter (URI-decoded), or null
877 * if the parameter is not found.
878 */
879goog.uri.utils.getParamValue = function(uri, keyEncoded) {
880 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
881 var foundIndex = goog.uri.utils.findParam_(
882 uri, 0, keyEncoded, hashOrEndIndex);
883
884 if (foundIndex < 0) {
885 return null;
886 } else {
887 var endPosition = uri.indexOf('&', foundIndex);
888 if (endPosition < 0 || endPosition > hashOrEndIndex) {
889 endPosition = hashOrEndIndex;
890 }
891 // Progress forth to the end of the "key=" or "key&" substring.
892 foundIndex += keyEncoded.length + 1;
893 // Use substr, because it (unlike substring) will return empty string
894 // if foundIndex > endPosition.
895 return goog.string.urlDecode(
896 uri.substr(foundIndex, endPosition - foundIndex));
897 }
898};
899
900
901/**
902 * Gets all values of a query parameter.
903 * @param {string} uri The URI to process. May contain a fragment.
904 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
905 * @return {!Array<string>} All URI-decoded values with the given key.
906 * If the key is not found, this will have length 0, but never be null.
907 */
908goog.uri.utils.getParamValues = function(uri, keyEncoded) {
909 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
910 var position = 0;
911 var foundIndex;
912 var result = [];
913
914 while ((foundIndex = goog.uri.utils.findParam_(
915 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
916 // Find where this parameter ends, either the '&' or the end of the
917 // query parameters.
918 position = uri.indexOf('&', foundIndex);
919 if (position < 0 || position > hashOrEndIndex) {
920 position = hashOrEndIndex;
921 }
922
923 // Progress forth to the end of the "key=" or "key&" substring.
924 foundIndex += keyEncoded.length + 1;
925 // Use substr, because it (unlike substring) will return empty string
926 // if foundIndex > position.
927 result.push(goog.string.urlDecode(uri.substr(
928 foundIndex, position - foundIndex)));
929 }
930
931 return result;
932};
933
934
935/**
936 * Regexp to find trailing question marks and ampersands.
937 * @type {RegExp}
938 * @private
939 */
940goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/;
941
942
943/**
944 * Removes all instances of a query parameter.
945 * @param {string} uri The URI to process. Must not contain a fragment.
946 * @param {string} keyEncoded The URI-encoded key.
947 * @return {string} The URI with all instances of the parameter removed.
948 */
949goog.uri.utils.removeParam = function(uri, keyEncoded) {
950 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
951 var position = 0;
952 var foundIndex;
953 var buffer = [];
954
955 // Look for a query parameter.
956 while ((foundIndex = goog.uri.utils.findParam_(
957 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
958 // Get the portion of the query string up to, but not including, the ?
959 // or & starting the parameter.
960 buffer.push(uri.substring(position, foundIndex));
961 // Progress to immediately after the '&'. If not found, go to the end.
962 // Avoid including the hash mark.
963 position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex,
964 hashOrEndIndex);
965 }
966
967 // Append everything that is remaining.
968 buffer.push(uri.substr(position));
969
970 // Join the buffer, and remove trailing punctuation that remains.
971 return buffer.join('').replace(
972 goog.uri.utils.trailingQueryPunctuationRe_, '$1');
973};
974
975
976/**
977 * Replaces all existing definitions of a parameter with a single definition.
978 *
979 * Repeated calls to this can exhibit quadratic behavior due to the need to
980 * find existing instances and reconstruct the string, though it should be
981 * limited given the 2kb limit. Consider using appendParams to append multiple
982 * parameters in bulk.
983 *
984 * @param {string} uri The original URI, which may already have query data.
985 * @param {string} keyEncoded The key, which must already be URI encoded.
986 * @param {*} value The value, which will be stringized and encoded (assumed
987 * not already to be encoded).
988 * @return {string} The URI with the query parameter added.
989 */
990goog.uri.utils.setParam = function(uri, keyEncoded, value) {
991 return goog.uri.utils.appendParam(
992 goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value);
993};
994
995
996/**
997 * Generates a URI path using a given URI and a path with checks to
998 * prevent consecutive "//". The baseUri passed in must not contain
999 * query or fragment identifiers. The path to append may not contain query or
1000 * fragment identifiers.
1001 *
1002 * @param {string} baseUri URI to use as the base.
1003 * @param {string} path Path to append.
1004 * @return {string} Updated URI.
1005 */
1006goog.uri.utils.appendPath = function(baseUri, path) {
1007 goog.uri.utils.assertNoFragmentsOrQueries_(baseUri);
1008
1009 // Remove any trailing '/'
1010 if (goog.string.endsWith(baseUri, '/')) {
1011 baseUri = baseUri.substr(0, baseUri.length - 1);
1012 }
1013 // Remove any leading '/'
1014 if (goog.string.startsWith(path, '/')) {
1015 path = path.substr(1);
1016 }
1017 return goog.string.buildString(baseUri, '/', path);
1018};
1019
1020
1021/**
1022 * Replaces the path.
1023 * @param {string} uri URI to use as the base.
1024 * @param {string} path New path.
1025 * @return {string} Updated URI.
1026 */
1027goog.uri.utils.setPath = function(uri, path) {
1028 // Add any missing '/'.
1029 if (!goog.string.startsWith(path, '/')) {
1030 path = '/' + path;
1031 }
1032 var parts = goog.uri.utils.split(uri);
1033 return goog.uri.utils.buildFromEncodedParts(
1034 parts[goog.uri.utils.ComponentIndex.SCHEME],
1035 parts[goog.uri.utils.ComponentIndex.USER_INFO],
1036 parts[goog.uri.utils.ComponentIndex.DOMAIN],
1037 parts[goog.uri.utils.ComponentIndex.PORT],
1038 path,
1039 parts[goog.uri.utils.ComponentIndex.QUERY_DATA],
1040 parts[goog.uri.utils.ComponentIndex.FRAGMENT]);
1041};
1042
1043
1044/**
1045 * Standard supported query parameters.
1046 * @enum {string}
1047 */
1048goog.uri.utils.StandardQueryParam = {
1049
1050 /** Unused parameter for unique-ifying. */
1051 RANDOM: 'zx'
1052};
1053
1054
1055/**
1056 * Sets the zx parameter of a URI to a random value.
1057 * @param {string} uri Any URI.
1058 * @return {string} That URI with the "zx" parameter added or replaced to
1059 * contain a random string.
1060 */
1061goog.uri.utils.makeUnique = function(uri) {
1062 return goog.uri.utils.setParam(uri,
1063 goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString());
1064};