lib/goog/uri/utils.js

1// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Simple utilities for dealing with URI strings.
17 *
18 * This is intended to be a lightweight alternative to constructing goog.Uri
19 * objects. Whereas goog.Uri adds several kilobytes to the binary regardless
20 * of how much of its functionality you use, this is designed to be a set of
21 * mostly-independent utilities so that the compiler includes only what is
22 * necessary for the task. Estimated savings of porting is 5k pre-gzip and
23 * 1.5k post-gzip. To ensure the savings remain, future developers should
24 * avoid adding new functionality to existing functions, but instead create
25 * new ones and factor out shared code.
26 *
27 * Many of these utilities have limited functionality, tailored to common
28 * cases. The query parameter utilities assume that the parameter keys are
29 * already encoded, since most keys are compile-time alphanumeric strings. The
30 * query parameter mutation utilities also do not tolerate fragment identifiers.
31 *
32 * By design, these functions can be slower than goog.Uri equivalents.
33 * Repeated calls to some of functions may be quadratic in behavior for IE,
34 * although the effect is somewhat limited given the 2kb limit.
35 *
36 * One advantage of the limited functionality here is that this approach is
37 * less sensitive to differences in URI encodings than goog.Uri, since these
38 * functions operate on strings directly, rather than decoding them and
39 * then re-encoding.
40 *
41 * Uses features of RFC 3986 for parsing/formatting URIs:
42 * http://www.ietf.org/rfc/rfc3986.txt
43 *
44 * @author gboyer@google.com (Garrett Boyer) - The "lightened" design.
45 */
46
47goog.provide('goog.uri.utils');
48goog.provide('goog.uri.utils.ComponentIndex');
49goog.provide('goog.uri.utils.QueryArray');
50goog.provide('goog.uri.utils.QueryValue');
51goog.provide('goog.uri.utils.StandardQueryParam');
52
53goog.require('goog.asserts');
54goog.require('goog.string');
55goog.require('goog.userAgent');
56
57
58/**
59 * Character codes inlined to avoid object allocations due to charCode.
60 * @enum {number}
61 * @private
62 */
63goog.uri.utils.CharCode_ = {
64 AMPERSAND: 38,
65 EQUAL: 61,
66 HASH: 35,
67 QUESTION: 63
68};
69
70
71/**
72 * Builds a URI string from already-encoded parts.
73 *
74 * No encoding is performed. Any component may be omitted as either null or
75 * undefined.
76 *
77 * @param {?string=} opt_scheme The scheme such as 'http'.
78 * @param {?string=} opt_userInfo The user name before the '@'.
79 * @param {?string=} opt_domain The domain such as 'www.google.com', already
80 * URI-encoded.
81 * @param {(string|number|null)=} opt_port The port number.
82 * @param {?string=} opt_path The path, already URI-encoded. If it is not
83 * empty, it must begin with a slash.
84 * @param {?string=} opt_queryData The URI-encoded query data.
85 * @param {?string=} opt_fragment The URI-encoded fragment identifier.
86 * @return {string} The fully combined URI.
87 */
88goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo,
89 opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) {
90 var out = '';
91
92 if (opt_scheme) {
93 out += opt_scheme + ':';
94 }
95
96 if (opt_domain) {
97 out += '//';
98
99 if (opt_userInfo) {
100 out += opt_userInfo + '@';
101 }
102
103 out += opt_domain;
104
105 if (opt_port) {
106 out += ':' + opt_port;
107 }
108 }
109
110 if (opt_path) {
111 out += opt_path;
112 }
113
114 if (opt_queryData) {
115 out += '?' + opt_queryData;
116 }
117
118 if (opt_fragment) {
119 out += '#' + opt_fragment;
120 }
121
122 return out;
123};
124
125
126/**
127 * A regular expression for breaking a URI into its component parts.
128 *
129 * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B
130 * As the "first-match-wins" algorithm is identical to the "greedy"
131 * disambiguation method used by POSIX regular expressions, it is natural and
132 * commonplace to use a regular expression for parsing the potential five
133 * components of a URI reference.
134 *
135 * The following line is the regular expression for breaking-down a
136 * well-formed URI reference into its components.
137 *
138 * <pre>
139 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
140 * 12 3 4 5 6 7 8 9
141 * </pre>
142 *
143 * The numbers in the second line above are only to assist readability; they
144 * indicate the reference points for each subexpression (i.e., each paired
145 * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
146 * For example, matching the above expression to
147 * <pre>
148 * http://www.ics.uci.edu/pub/ietf/uri/#Related
149 * </pre>
150 * results in the following subexpression matches:
151 * <pre>
152 * $1 = http:
153 * $2 = http
154 * $3 = //www.ics.uci.edu
155 * $4 = www.ics.uci.edu
156 * $5 = /pub/ietf/uri/
157 * $6 = <undefined>
158 * $7 = <undefined>
159 * $8 = #Related
160 * $9 = Related
161 * </pre>
162 * where <undefined> indicates that the component is not present, as is the
163 * case for the query component in the above example. Therefore, we can
164 * determine the value of the five components as
165 * <pre>
166 * scheme = $2
167 * authority = $4
168 * path = $5
169 * query = $7
170 * fragment = $9
171 * </pre>
172 *
173 * The regular expression has been modified slightly to expose the
174 * userInfo, domain, and port separately from the authority.
175 * The modified version yields
176 * <pre>
177 * $1 = http scheme
178 * $2 = <undefined> userInfo -\
179 * $3 = www.ics.uci.edu domain | authority
180 * $4 = <undefined> port -/
181 * $5 = /pub/ietf/uri/ path
182 * $6 = <undefined> query without ?
183 * $7 = Related fragment without #
184 * </pre>
185 * @type {!RegExp}
186 * @private
187 */
188goog.uri.utils.splitRe_ = new RegExp(
189 '^' +
190 '(?:' +
191 '([^:/?#.]+)' + // scheme - ignore special characters
192 // used by other URL parts such as :,
193 // ?, /, #, and .
194 ':)?' +
195 '(?://' +
196 '(?:([^/?#]*)@)?' + // userInfo
197 '([^/#?]*?)' + // domain
198 '(?::([0-9]+))?' + // port
199 '(?=[/#?]|$)' + // authority-terminating character
200 ')?' +
201 '([^?#]+)?' + // path
202 '(?:\\?([^#]*))?' + // query
203 '(?:#(.*))?' + // fragment
204 '$');
205
206
207/**
208 * The index of each URI component in the return value of goog.uri.utils.split.
209 * @enum {number}
210 */
211goog.uri.utils.ComponentIndex = {
212 SCHEME: 1,
213 USER_INFO: 2,
214 DOMAIN: 3,
215 PORT: 4,
216 PATH: 5,
217 QUERY_DATA: 6,
218 FRAGMENT: 7
219};
220
221
222/**
223 * Splits a URI into its component parts.
224 *
225 * Each component can be accessed via the component indices; for example:
226 * <pre>
227 * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA];
228 * </pre>
229 *
230 * @param {string} uri The URI string to examine.
231 * @return {!Array<string|undefined>} Each component still URI-encoded.
232 * Each component that is present will contain the encoded value, whereas
233 * components that are not present will be undefined or empty, depending
234 * on the browser's regular expression implementation. Never null, since
235 * arbitrary strings may still look like path names.
236 */
237goog.uri.utils.split = function(uri) {
238 goog.uri.utils.phishingProtection_();
239
240 // See @return comment -- never null.
241 return /** @type {!Array<string|undefined>} */ (
242 uri.match(goog.uri.utils.splitRe_));
243};
244
245
246/**
247 * Safari has a nasty bug where if you have an http URL with a username, e.g.,
248 * http://evil.com%2F@google.com/
249 * Safari will report that window.location.href is
250 * http://evil.com/google.com/
251 * so that anyone who tries to parse the domain of that URL will get
252 * the wrong domain. We've seen exploits where people use this to trick
253 * Safari into loading resources from evil domains.
254 *
255 * To work around this, we run a little "Safari phishing check", and throw
256 * an exception if we see this happening.
257 *
258 * There is no convenient place to put this check. We apply it to
259 * anyone doing URI parsing on Webkit. We're not happy about this, but
260 * it fixes the problem.
261 *
262 * This should be removed once Safari fixes their bug.
263 *
264 * Exploit reported by Masato Kinugawa.
265 *
266 * @type {boolean}
267 * @private
268 */
269goog.uri.utils.needsPhishingProtection_ = goog.userAgent.WEBKIT;
270
271
272/**
273 * Check to see if the user is being phished.
274 * @private
275 */
276goog.uri.utils.phishingProtection_ = function() {
277 if (goog.uri.utils.needsPhishingProtection_) {
278 // Turn protection off, so that we don't recurse.
279 goog.uri.utils.needsPhishingProtection_ = false;
280
281 // Use quoted access, just in case the user isn't using location externs.
282 var location = goog.global['location'];
283 if (location) {
284 var href = location['href'];
285 if (href) {
286 var domain = goog.uri.utils.getDomain(href);
287 if (domain && domain != location['hostname']) {
288 // Phishing attack
289 goog.uri.utils.needsPhishingProtection_ = true;
290 throw Error();
291 }
292 }
293 }
294 }
295};
296
297
298/**
299 * @param {?string} uri A possibly null string.
300 * @param {boolean=} opt_preserveReserved If true, percent-encoding of RFC-3986
301 * reserved characters will not be removed.
302 * @return {?string} The string URI-decoded, or null if uri is null.
303 * @private
304 */
305goog.uri.utils.decodeIfPossible_ = function(uri, opt_preserveReserved) {
306 if (!uri) {
307 return uri;
308 }
309
310 return opt_preserveReserved ? decodeURI(uri) : decodeURIComponent(uri);
311};
312
313
314/**
315 * Gets a URI component by index.
316 *
317 * It is preferred to use the getPathEncoded() variety of functions ahead,
318 * since they are more readable.
319 *
320 * @param {goog.uri.utils.ComponentIndex} componentIndex The component index.
321 * @param {string} uri The URI to examine.
322 * @return {?string} The still-encoded component, or null if the component
323 * is not present.
324 * @private
325 */
326goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) {
327 // Convert undefined, null, and empty string into null.
328 return goog.uri.utils.split(uri)[componentIndex] || null;
329};
330
331
332/**
333 * @param {string} uri The URI to examine.
334 * @return {?string} The protocol or scheme, or null if none. Does not
335 * include trailing colons or slashes.
336 */
337goog.uri.utils.getScheme = function(uri) {
338 return goog.uri.utils.getComponentByIndex_(
339 goog.uri.utils.ComponentIndex.SCHEME, uri);
340};
341
342
343/**
344 * Gets the effective scheme for the URL. If the URL is relative then the
345 * scheme is derived from the page's location.
346 * @param {string} uri The URI to examine.
347 * @return {string} The protocol or scheme, always lower case.
348 */
349goog.uri.utils.getEffectiveScheme = function(uri) {
350 var scheme = goog.uri.utils.getScheme(uri);
351 if (!scheme && self.location) {
352 var protocol = self.location.protocol;
353 scheme = protocol.substr(0, protocol.length - 1);
354 }
355 // NOTE: When called from a web worker in Firefox 3.5, location maybe null.
356 // All other browsers with web workers support self.location from the worker.
357 return scheme ? scheme.toLowerCase() : '';
358};
359
360
361/**
362 * @param {string} uri The URI to examine.
363 * @return {?string} The user name still encoded, or null if none.
364 */
365goog.uri.utils.getUserInfoEncoded = function(uri) {
366 return goog.uri.utils.getComponentByIndex_(
367 goog.uri.utils.ComponentIndex.USER_INFO, uri);
368};
369
370
371/**
372 * @param {string} uri The URI to examine.
373 * @return {?string} The decoded user info, or null if none.
374 */
375goog.uri.utils.getUserInfo = function(uri) {
376 return goog.uri.utils.decodeIfPossible_(
377 goog.uri.utils.getUserInfoEncoded(uri));
378};
379
380
381/**
382 * @param {string} uri The URI to examine.
383 * @return {?string} The domain name still encoded, or null if none.
384 */
385goog.uri.utils.getDomainEncoded = function(uri) {
386 return goog.uri.utils.getComponentByIndex_(
387 goog.uri.utils.ComponentIndex.DOMAIN, uri);
388};
389
390
391/**
392 * @param {string} uri The URI to examine.
393 * @return {?string} The decoded domain, or null if none.
394 */
395goog.uri.utils.getDomain = function(uri) {
396 return goog.uri.utils.decodeIfPossible_(
397 goog.uri.utils.getDomainEncoded(uri), true /* opt_preserveReserved */);
398};
399
400
401/**
402 * @param {string} uri The URI to examine.
403 * @return {?number} The port number, or null if none.
404 */
405goog.uri.utils.getPort = function(uri) {
406 // Coerce to a number. If the result of getComponentByIndex_ is null or
407 // non-numeric, the number coersion yields NaN. This will then return
408 // null for all non-numeric cases (though also zero, which isn't a relevant
409 // port number).
410 return Number(goog.uri.utils.getComponentByIndex_(
411 goog.uri.utils.ComponentIndex.PORT, uri)) || null;
412};
413
414
415/**
416 * @param {string} uri The URI to examine.
417 * @return {?string} The path still encoded, or null if none. Includes the
418 * leading slash, if any.
419 */
420goog.uri.utils.getPathEncoded = function(uri) {
421 return goog.uri.utils.getComponentByIndex_(
422 goog.uri.utils.ComponentIndex.PATH, uri);
423};
424
425
426/**
427 * @param {string} uri The URI to examine.
428 * @return {?string} The decoded path, or null if none. Includes the leading
429 * slash, if any.
430 */
431goog.uri.utils.getPath = function(uri) {
432 return goog.uri.utils.decodeIfPossible_(
433 goog.uri.utils.getPathEncoded(uri), true /* opt_preserveReserved */);
434};
435
436
437/**
438 * @param {string} uri The URI to examine.
439 * @return {?string} The query data still encoded, or null if none. Does not
440 * include the question mark itself.
441 */
442goog.uri.utils.getQueryData = function(uri) {
443 return goog.uri.utils.getComponentByIndex_(
444 goog.uri.utils.ComponentIndex.QUERY_DATA, uri);
445};
446
447
448/**
449 * @param {string} uri The URI to examine.
450 * @return {?string} The fragment identifier, or null if none. Does not
451 * include the hash mark itself.
452 */
453goog.uri.utils.getFragmentEncoded = function(uri) {
454 // The hash mark may not appear in any other part of the URL.
455 var hashIndex = uri.indexOf('#');
456 return hashIndex < 0 ? null : uri.substr(hashIndex + 1);
457};
458
459
460/**
461 * @param {string} uri The URI to examine.
462 * @param {?string} fragment The encoded fragment identifier, or null if none.
463 * Does not include the hash mark itself.
464 * @return {string} The URI with the fragment set.
465 */
466goog.uri.utils.setFragmentEncoded = function(uri, fragment) {
467 return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : '');
468};
469
470
471/**
472 * @param {string} uri The URI to examine.
473 * @return {?string} The decoded fragment identifier, or null if none. Does
474 * not include the hash mark.
475 */
476goog.uri.utils.getFragment = function(uri) {
477 return goog.uri.utils.decodeIfPossible_(
478 goog.uri.utils.getFragmentEncoded(uri));
479};
480
481
482/**
483 * Extracts everything up to the port of the URI.
484 * @param {string} uri The URI string.
485 * @return {string} Everything up to and including the port.
486 */
487goog.uri.utils.getHost = function(uri) {
488 var pieces = goog.uri.utils.split(uri);
489 return goog.uri.utils.buildFromEncodedParts(
490 pieces[goog.uri.utils.ComponentIndex.SCHEME],
491 pieces[goog.uri.utils.ComponentIndex.USER_INFO],
492 pieces[goog.uri.utils.ComponentIndex.DOMAIN],
493 pieces[goog.uri.utils.ComponentIndex.PORT]);
494};
495
496
497/**
498 * Extracts the path of the URL and everything after.
499 * @param {string} uri The URI string.
500 * @return {string} The URI, starting at the path and including the query
501 * parameters and fragment identifier.
502 */
503goog.uri.utils.getPathAndAfter = function(uri) {
504 var pieces = goog.uri.utils.split(uri);
505 return goog.uri.utils.buildFromEncodedParts(null, null, null, null,
506 pieces[goog.uri.utils.ComponentIndex.PATH],
507 pieces[goog.uri.utils.ComponentIndex.QUERY_DATA],
508 pieces[goog.uri.utils.ComponentIndex.FRAGMENT]);
509};
510
511
512/**
513 * Gets the URI with the fragment identifier removed.
514 * @param {string} uri The URI to examine.
515 * @return {string} Everything preceding the hash mark.
516 */
517goog.uri.utils.removeFragment = function(uri) {
518 // The hash mark may not appear in any other part of the URL.
519 var hashIndex = uri.indexOf('#');
520 return hashIndex < 0 ? uri : uri.substr(0, hashIndex);
521};
522
523
524/**
525 * Ensures that two URI's have the exact same domain, scheme, and port.
526 *
527 * Unlike the version in goog.Uri, this checks protocol, and therefore is
528 * suitable for checking against the browser's same-origin policy.
529 *
530 * @param {string} uri1 The first URI.
531 * @param {string} uri2 The second URI.
532 * @return {boolean} Whether they have the same scheme, domain and port.
533 */
534goog.uri.utils.haveSameDomain = function(uri1, uri2) {
535 var pieces1 = goog.uri.utils.split(uri1);
536 var pieces2 = goog.uri.utils.split(uri2);
537 return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] ==
538 pieces2[goog.uri.utils.ComponentIndex.DOMAIN] &&
539 pieces1[goog.uri.utils.ComponentIndex.SCHEME] ==
540 pieces2[goog.uri.utils.ComponentIndex.SCHEME] &&
541 pieces1[goog.uri.utils.ComponentIndex.PORT] ==
542 pieces2[goog.uri.utils.ComponentIndex.PORT];
543};
544
545
546/**
547 * Asserts that there are no fragment or query identifiers, only in uncompiled
548 * mode.
549 * @param {string} uri The URI to examine.
550 * @private
551 */
552goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) {
553 // NOTE: would use goog.asserts here, but jscompiler doesn't know that
554 // indexOf has no side effects.
555 if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) {
556 throw Error('goog.uri.utils: Fragment or query identifiers are not ' +
557 'supported: [' + uri + ']');
558 }
559};
560
561
562/**
563 * Supported query parameter values by the parameter serializing utilities.
564 *
565 * If a value is null or undefined, the key-value pair is skipped, as an easy
566 * way to omit parameters conditionally. Non-array parameters are converted
567 * to a string and URI encoded. Array values are expanded into multiple
568 * &key=value pairs, with each element stringized and URI-encoded.
569 *
570 * @typedef {*}
571 */
572goog.uri.utils.QueryValue;
573
574
575/**
576 * An array representing a set of query parameters with alternating keys
577 * and values.
578 *
579 * Keys are assumed to be URI encoded already and live at even indices. See
580 * goog.uri.utils.QueryValue for details on how parameter values are encoded.
581 *
582 * Example:
583 * <pre>
584 * var data = [
585 * // Simple param: ?name=BobBarker
586 * 'name', 'BobBarker',
587 * // Conditional param -- may be omitted entirely.
588 * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null,
589 * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null
590 * 'house', ['LosAngeles', 'NewYork', null]
591 * ];
592 * </pre>
593 *
594 * @typedef {!Array<string|goog.uri.utils.QueryValue>}
595 */
596goog.uri.utils.QueryArray;
597
598
599/**
600 * Parses encoded query parameters and calls callback function for every
601 * parameter found in the string.
602 *
603 * Missing value of parameter (e.g. “…&key&…”) is treated as if the value was an
604 * empty string. Keys may be empty strings (e.g. “…&=value&…”) which also means
605 * that “…&=&…” and “…&&…” will result in an empty key and value.
606 *
607 * @param {string} encodedQuery Encoded query string excluding question mark at
608 * the beginning.
609 * @param {function(string, string)} callback Function called for every
610 * parameter found in query string. The first argument (name) will not be
611 * urldecoded (so the function is consistent with buildQueryData), but the
612 * second will. If the parameter has no value (i.e. “=” was not present)
613 * the second argument (value) will be an empty string.
614 */
615goog.uri.utils.parseQueryData = function(encodedQuery, callback) {
616 var pairs = encodedQuery.split('&');
617 for (var i = 0; i < pairs.length; i++) {
618 var indexOfEquals = pairs[i].indexOf('=');
619 var name = null;
620 var value = null;
621 if (indexOfEquals >= 0) {
622 name = pairs[i].substring(0, indexOfEquals);
623 value = pairs[i].substring(indexOfEquals + 1);
624 } else {
625 name = pairs[i];
626 }
627 callback(name, value ? goog.string.urlDecode(value) : '');
628 }
629};
630
631
632/**
633 * Appends a URI and query data in a string buffer with special preconditions.
634 *
635 * Internal implementation utility, performing very few object allocations.
636 *
637 * @param {!Array<string|undefined>} buffer A string buffer. The first element
638 * must be the base URI, and may have a fragment identifier. If the array
639 * contains more than one element, the second element must be an ampersand,
640 * and may be overwritten, depending on the base URI. Undefined elements
641 * are treated as empty-string.
642 * @return {string} The concatenated URI and query data.
643 * @private
644 */
645goog.uri.utils.appendQueryData_ = function(buffer) {
646 if (buffer[1]) {
647 // At least one query parameter was added. We need to check the
648 // punctuation mark, which is currently an ampersand, and also make sure
649 // there aren't any interfering fragment identifiers.
650 var baseUri = /** @type {string} */ (buffer[0]);
651 var hashIndex = baseUri.indexOf('#');
652 if (hashIndex >= 0) {
653 // Move the fragment off the base part of the URI into the end.
654 buffer.push(baseUri.substr(hashIndex));
655 buffer[0] = baseUri = baseUri.substr(0, hashIndex);
656 }
657 var questionIndex = baseUri.indexOf('?');
658 if (questionIndex < 0) {
659 // No question mark, so we need a question mark instead of an ampersand.
660 buffer[1] = '?';
661 } else if (questionIndex == baseUri.length - 1) {
662 // Question mark is the very last character of the existing URI, so don't
663 // append an additional delimiter.
664 buffer[1] = undefined;
665 }
666 }
667
668 return buffer.join('');
669};
670
671
672/**
673 * Appends key=value pairs to an array, supporting multi-valued objects.
674 * @param {string} key The key prefix.
675 * @param {goog.uri.utils.QueryValue} value The value to serialize.
676 * @param {!Array<string>} pairs The array to which the 'key=value' strings
677 * should be appended.
678 * @private
679 */
680goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) {
681 if (goog.isArray(value)) {
682 // Convince the compiler it's an array.
683 goog.asserts.assertArray(value);
684 for (var j = 0; j < value.length; j++) {
685 // Convert to string explicitly, to short circuit the null and array
686 // logic in this function -- this ensures that null and undefined get
687 // written as literal 'null' and 'undefined', and arrays don't get
688 // expanded out but instead encoded in the default way.
689 goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs);
690 }
691 } else if (value != null) {
692 // Skip a top-level null or undefined entirely.
693 pairs.push('&', key,
694 // Check for empty string. Zero gets encoded into the url as literal
695 // strings. For empty string, skip the equal sign, to be consistent
696 // with UriBuilder.java.
697 value === '' ? '' : '=',
698 goog.string.urlEncode(value));
699 }
700};
701
702
703/**
704 * Builds a buffer of query data from a sequence of alternating keys and values.
705 *
706 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
707 * first element appended will be an '&', and may be replaced by the caller.
708 * @param {!goog.uri.utils.QueryArray|!Arguments} keysAndValues An array with
709 * alternating keys and values -- see the typedef.
710 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
711 * @return {!Array<string|undefined>} The buffer argument.
712 * @private
713 */
714goog.uri.utils.buildQueryDataBuffer_ = function(
715 buffer, keysAndValues, opt_startIndex) {
716 goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0),
717 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.');
718
719 for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) {
720 goog.uri.utils.appendKeyValuePairs_(
721 keysAndValues[i], keysAndValues[i + 1], buffer);
722 }
723
724 return buffer;
725};
726
727
728/**
729 * Builds a query data string from a sequence of alternating keys and values.
730 * Currently generates "&key&" for empty args.
731 *
732 * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and
733 * values. See the typedef.
734 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
735 * @return {string} The encoded query string, in the form 'a=1&b=2'.
736 */
737goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) {
738 var buffer = goog.uri.utils.buildQueryDataBuffer_(
739 [], keysAndValues, opt_startIndex);
740 buffer[0] = ''; // Remove the leading ampersand.
741 return buffer.join('');
742};
743
744
745/**
746 * Builds a buffer of query data from a map.
747 *
748 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
749 * first element appended will be an '&', and may be replaced by the caller.
750 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
751 * are URI-encoded parameter keys, and the values conform to the contract
752 * specified in the goog.uri.utils.QueryValue typedef.
753 * @return {!Array<string|undefined>} The buffer argument.
754 * @private
755 */
756goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) {
757 for (var key in map) {
758 goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer);
759 }
760
761 return buffer;
762};
763
764
765/**
766 * Builds a query data string from a map.
767 * Currently generates "&key&" for empty args.
768 *
769 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
770 * are URI-encoded parameter keys, and the values are arbitrary types
771 * or arrays. Keys with a null value are dropped.
772 * @return {string} The encoded query string, in the form 'a=1&b=2'.
773 */
774goog.uri.utils.buildQueryDataFromMap = function(map) {
775 var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map);
776 buffer[0] = '';
777 return buffer.join('');
778};
779
780
781/**
782 * Appends URI parameters to an existing URI.
783 *
784 * The variable arguments may contain alternating keys and values. Keys are
785 * assumed to be already URI encoded. The values should not be URI-encoded,
786 * and will instead be encoded by this function.
787 * <pre>
788 * appendParams('http://www.foo.com?existing=true',
789 * 'key1', 'value1',
790 * 'key2', 'value?willBeEncoded',
791 * 'key3', ['valueA', 'valueB', 'valueC'],
792 * 'key4', null);
793 * result: 'http://www.foo.com?existing=true&' +
794 * 'key1=value1&' +
795 * 'key2=value%3FwillBeEncoded&' +
796 * 'key3=valueA&key3=valueB&key3=valueC'
797 * </pre>
798 *
799 * A single call to this function will not exhibit quadratic behavior in IE,
800 * whereas multiple repeated calls may, although the effect is limited by
801 * fact that URL's generally can't exceed 2kb.
802 *
803 * @param {string} uri The original URI, which may already have query data.
804 * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args
805 * An array or argument list conforming to goog.uri.utils.QueryArray.
806 * @return {string} The URI with all query parameters added.
807 */
808goog.uri.utils.appendParams = function(uri, var_args) {
809 return goog.uri.utils.appendQueryData_(
810 arguments.length == 2 ?
811 goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) :
812 goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1));
813};
814
815
816/**
817 * Appends query parameters from a map.
818 *
819 * @param {string} uri The original URI, which may already have query data.
820 * @param {!Object<goog.uri.utils.QueryValue>} map An object where keys are
821 * URI-encoded parameter keys, and the values are arbitrary types or arrays.
822 * Keys with a null value are dropped.
823 * @return {string} The new parameters.
824 */
825goog.uri.utils.appendParamsFromMap = function(uri, map) {
826 return goog.uri.utils.appendQueryData_(
827 goog.uri.utils.buildQueryDataBufferFromMap_([uri], map));
828};
829
830
831/**
832 * Appends a single URI parameter.
833 *
834 * Repeated calls to this can exhibit quadratic behavior in IE6 due to the
835 * way string append works, though it should be limited given the 2kb limit.
836 *
837 * @param {string} uri The original URI, which may already have query data.
838 * @param {string} key The key, which must already be URI encoded.
839 * @param {*=} opt_value The value, which will be stringized and encoded
840 * (assumed not already to be encoded). If omitted, undefined, or null, the
841 * key will be added as a valueless parameter.
842 * @return {string} The URI with the query parameter added.
843 */
844goog.uri.utils.appendParam = function(uri, key, opt_value) {
845 var paramArr = [uri, '&', key];
846 if (goog.isDefAndNotNull(opt_value)) {
847 paramArr.push('=', goog.string.urlEncode(opt_value));
848 }
849 return goog.uri.utils.appendQueryData_(paramArr);
850};
851
852
853/**
854 * Finds the next instance of a query parameter with the specified name.
855 *
856 * Does not instantiate any objects.
857 *
858 * @param {string} uri The URI to search. May contain a fragment identifier
859 * if opt_hashIndex is specified.
860 * @param {number} startIndex The index to begin searching for the key at. A
861 * match may be found even if this is one character after the ampersand.
862 * @param {string} keyEncoded The URI-encoded key.
863 * @param {number} hashOrEndIndex Index to stop looking at. If a hash
864 * mark is present, it should be its index, otherwise it should be the
865 * length of the string.
866 * @return {number} The position of the first character in the key's name,
867 * immediately after either a question mark or a dot.
868 * @private
869 */
870goog.uri.utils.findParam_ = function(
871 uri, startIndex, keyEncoded, hashOrEndIndex) {
872 var index = startIndex;
873 var keyLength = keyEncoded.length;
874
875 // Search for the key itself and post-filter for surronuding punctuation,
876 // rather than expensively building a regexp.
877 while ((index = uri.indexOf(keyEncoded, index)) >= 0 &&
878 index < hashOrEndIndex) {
879 var precedingChar = uri.charCodeAt(index - 1);
880 // Ensure that the preceding character is '&' or '?'.
881 if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND ||
882 precedingChar == goog.uri.utils.CharCode_.QUESTION) {
883 // Ensure the following character is '&', '=', '#', or NaN
884 // (end of string).
885 var followingChar = uri.charCodeAt(index + keyLength);
886 if (!followingChar ||
887 followingChar == goog.uri.utils.CharCode_.EQUAL ||
888 followingChar == goog.uri.utils.CharCode_.AMPERSAND ||
889 followingChar == goog.uri.utils.CharCode_.HASH) {
890 return index;
891 }
892 }
893 index += keyLength + 1;
894 }
895
896 return -1;
897};
898
899
900/**
901 * Regular expression for finding a hash mark or end of string.
902 * @type {RegExp}
903 * @private
904 */
905goog.uri.utils.hashOrEndRe_ = /#|$/;
906
907
908/**
909 * Determines if the URI contains a specific key.
910 *
911 * Performs no object instantiations.
912 *
913 * @param {string} uri The URI to process. May contain a fragment
914 * identifier.
915 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
916 * @return {boolean} Whether the key is present.
917 */
918goog.uri.utils.hasParam = function(uri, keyEncoded) {
919 return goog.uri.utils.findParam_(uri, 0, keyEncoded,
920 uri.search(goog.uri.utils.hashOrEndRe_)) >= 0;
921};
922
923
924/**
925 * Gets the first value of a query parameter.
926 * @param {string} uri The URI to process. May contain a fragment.
927 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
928 * @return {?string} The first value of the parameter (URI-decoded), or null
929 * if the parameter is not found.
930 */
931goog.uri.utils.getParamValue = function(uri, keyEncoded) {
932 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
933 var foundIndex = goog.uri.utils.findParam_(
934 uri, 0, keyEncoded, hashOrEndIndex);
935
936 if (foundIndex < 0) {
937 return null;
938 } else {
939 var endPosition = uri.indexOf('&', foundIndex);
940 if (endPosition < 0 || endPosition > hashOrEndIndex) {
941 endPosition = hashOrEndIndex;
942 }
943 // Progress forth to the end of the "key=" or "key&" substring.
944 foundIndex += keyEncoded.length + 1;
945 // Use substr, because it (unlike substring) will return empty string
946 // if foundIndex > endPosition.
947 return goog.string.urlDecode(
948 uri.substr(foundIndex, endPosition - foundIndex));
949 }
950};
951
952
953/**
954 * Gets all values of a query parameter.
955 * @param {string} uri The URI to process. May contain a fragment.
956 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
957 * @return {!Array<string>} All URI-decoded values with the given key.
958 * If the key is not found, this will have length 0, but never be null.
959 */
960goog.uri.utils.getParamValues = function(uri, keyEncoded) {
961 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
962 var position = 0;
963 var foundIndex;
964 var result = [];
965
966 while ((foundIndex = goog.uri.utils.findParam_(
967 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
968 // Find where this parameter ends, either the '&' or the end of the
969 // query parameters.
970 position = uri.indexOf('&', foundIndex);
971 if (position < 0 || position > hashOrEndIndex) {
972 position = hashOrEndIndex;
973 }
974
975 // Progress forth to the end of the "key=" or "key&" substring.
976 foundIndex += keyEncoded.length + 1;
977 // Use substr, because it (unlike substring) will return empty string
978 // if foundIndex > position.
979 result.push(goog.string.urlDecode(uri.substr(
980 foundIndex, position - foundIndex)));
981 }
982
983 return result;
984};
985
986
987/**
988 * Regexp to find trailing question marks and ampersands.
989 * @type {RegExp}
990 * @private
991 */
992goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/;
993
994
995/**
996 * Removes all instances of a query parameter.
997 * @param {string} uri The URI to process. Must not contain a fragment.
998 * @param {string} keyEncoded The URI-encoded key.
999 * @return {string} The URI with all instances of the parameter removed.
1000 */
1001goog.uri.utils.removeParam = function(uri, keyEncoded) {
1002 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
1003 var position = 0;
1004 var foundIndex;
1005 var buffer = [];
1006
1007 // Look for a query parameter.
1008 while ((foundIndex = goog.uri.utils.findParam_(
1009 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
1010 // Get the portion of the query string up to, but not including, the ?
1011 // or & starting the parameter.
1012 buffer.push(uri.substring(position, foundIndex));
1013 // Progress to immediately after the '&'. If not found, go to the end.
1014 // Avoid including the hash mark.
1015 position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex,
1016 hashOrEndIndex);
1017 }
1018
1019 // Append everything that is remaining.
1020 buffer.push(uri.substr(position));
1021
1022 // Join the buffer, and remove trailing punctuation that remains.
1023 return buffer.join('').replace(
1024 goog.uri.utils.trailingQueryPunctuationRe_, '$1');
1025};
1026
1027
1028/**
1029 * Replaces all existing definitions of a parameter with a single definition.
1030 *
1031 * Repeated calls to this can exhibit quadratic behavior due to the need to
1032 * find existing instances and reconstruct the string, though it should be
1033 * limited given the 2kb limit. Consider using appendParams to append multiple
1034 * parameters in bulk.
1035 *
1036 * @param {string} uri The original URI, which may already have query data.
1037 * @param {string} keyEncoded The key, which must already be URI encoded.
1038 * @param {*} value The value, which will be stringized and encoded (assumed
1039 * not already to be encoded).
1040 * @return {string} The URI with the query parameter added.
1041 */
1042goog.uri.utils.setParam = function(uri, keyEncoded, value) {
1043 return goog.uri.utils.appendParam(
1044 goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value);
1045};
1046
1047
1048/**
1049 * Generates a URI path using a given URI and a path with checks to
1050 * prevent consecutive "//". The baseUri passed in must not contain
1051 * query or fragment identifiers. The path to append may not contain query or
1052 * fragment identifiers.
1053 *
1054 * @param {string} baseUri URI to use as the base.
1055 * @param {string} path Path to append.
1056 * @return {string} Updated URI.
1057 */
1058goog.uri.utils.appendPath = function(baseUri, path) {
1059 goog.uri.utils.assertNoFragmentsOrQueries_(baseUri);
1060
1061 // Remove any trailing '/'
1062 if (goog.string.endsWith(baseUri, '/')) {
1063 baseUri = baseUri.substr(0, baseUri.length - 1);
1064 }
1065 // Remove any leading '/'
1066 if (goog.string.startsWith(path, '/')) {
1067 path = path.substr(1);
1068 }
1069 return goog.string.buildString(baseUri, '/', path);
1070};
1071
1072
1073/**
1074 * Replaces the path.
1075 * @param {string} uri URI to use as the base.
1076 * @param {string} path New path.
1077 * @return {string} Updated URI.
1078 */
1079goog.uri.utils.setPath = function(uri, path) {
1080 // Add any missing '/'.
1081 if (!goog.string.startsWith(path, '/')) {
1082 path = '/' + path;
1083 }
1084 var parts = goog.uri.utils.split(uri);
1085 return goog.uri.utils.buildFromEncodedParts(
1086 parts[goog.uri.utils.ComponentIndex.SCHEME],
1087 parts[goog.uri.utils.ComponentIndex.USER_INFO],
1088 parts[goog.uri.utils.ComponentIndex.DOMAIN],
1089 parts[goog.uri.utils.ComponentIndex.PORT],
1090 path,
1091 parts[goog.uri.utils.ComponentIndex.QUERY_DATA],
1092 parts[goog.uri.utils.ComponentIndex.FRAGMENT]);
1093};
1094
1095
1096/**
1097 * Standard supported query parameters.
1098 * @enum {string}
1099 */
1100goog.uri.utils.StandardQueryParam = {
1101
1102 /** Unused parameter for unique-ifying. */
1103 RANDOM: 'zx'
1104};
1105
1106
1107/**
1108 * Sets the zx parameter of a URI to a random value.
1109 * @param {string} uri Any URI.
1110 * @return {string} That URI with the "zx" parameter added or replaced to
1111 * contain a random string.
1112 */
1113goog.uri.utils.makeUnique = function(uri) {
1114 return goog.uri.utils.setParam(uri,
1115 goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString());
1116};