lib/goog/uri/utils.js

1// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Simple utilities for dealing with URI strings.
17 *
18 * This is intended to be a lightweight alternative to constructing goog.Uri
19 * objects. Whereas goog.Uri adds several kilobytes to the binary regardless
20 * of how much of its functionality you use, this is designed to be a set of
21 * mostly-independent utilities so that the compiler includes only what is
22 * necessary for the task. Estimated savings of porting is 5k pre-gzip and
23 * 1.5k post-gzip. To ensure the savings remain, future developers should
24 * avoid adding new functionality to existing functions, but instead create
25 * new ones and factor out shared code.
26 *
27 * Many of these utilities have limited functionality, tailored to common
28 * cases. The query parameter utilities assume that the parameter keys are
29 * already encoded, since most keys are compile-time alphanumeric strings. The
30 * query parameter mutation utilities also do not tolerate fragment identifiers.
31 *
32 * By design, these functions can be slower than goog.Uri equivalents.
33 * Repeated calls to some of functions may be quadratic in behavior for IE,
34 * although the effect is somewhat limited given the 2kb limit.
35 *
36 * One advantage of the limited functionality here is that this approach is
37 * less sensitive to differences in URI encodings than goog.Uri, since these
38 * functions modify the strings in place, rather than decoding and
39 * re-encoding.
40 *
41 * Uses features of RFC 3986 for parsing/formatting URIs:
42 * http://www.ietf.org/rfc/rfc3986.txt
43 *
44 * @author gboyer@google.com (Garrett Boyer) - The "lightened" design.
45 * @author msamuel@google.com (Mike Samuel) - Domain knowledge and regexes.
46 */
47
48goog.provide('goog.uri.utils');
49goog.provide('goog.uri.utils.ComponentIndex');
50goog.provide('goog.uri.utils.QueryArray');
51goog.provide('goog.uri.utils.QueryValue');
52goog.provide('goog.uri.utils.StandardQueryParam');
53
54goog.require('goog.asserts');
55goog.require('goog.string');
56goog.require('goog.userAgent');
57
58
59/**
60 * Character codes inlined to avoid object allocations due to charCode.
61 * @enum {number}
62 * @private
63 */
64goog.uri.utils.CharCode_ = {
65 AMPERSAND: 38,
66 EQUAL: 61,
67 HASH: 35,
68 QUESTION: 63
69};
70
71
72/**
73 * Builds a URI string from already-encoded parts.
74 *
75 * No encoding is performed. Any component may be omitted as either null or
76 * undefined.
77 *
78 * @param {?string=} opt_scheme The scheme such as 'http'.
79 * @param {?string=} opt_userInfo The user name before the '@'.
80 * @param {?string=} opt_domain The domain such as 'www.google.com', already
81 * URI-encoded.
82 * @param {(string|number|null)=} opt_port The port number.
83 * @param {?string=} opt_path The path, already URI-encoded. If it is not
84 * empty, it must begin with a slash.
85 * @param {?string=} opt_queryData The URI-encoded query data.
86 * @param {?string=} opt_fragment The URI-encoded fragment identifier.
87 * @return {string} The fully combined URI.
88 */
89goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo,
90 opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) {
91 var out = '';
92
93 if (opt_scheme) {
94 out += opt_scheme + ':';
95 }
96
97 if (opt_domain) {
98 out += '//';
99
100 if (opt_userInfo) {
101 out += opt_userInfo + '@';
102 }
103
104 out += opt_domain;
105
106 if (opt_port) {
107 out += ':' + opt_port;
108 }
109 }
110
111 if (opt_path) {
112 out += opt_path;
113 }
114
115 if (opt_queryData) {
116 out += '?' + opt_queryData;
117 }
118
119 if (opt_fragment) {
120 out += '#' + opt_fragment;
121 }
122
123 return out;
124};
125
126
127/**
128 * A regular expression for breaking a URI into its component parts.
129 *
130 * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B
131 * As the "first-match-wins" algorithm is identical to the "greedy"
132 * disambiguation method used by POSIX regular expressions, it is natural and
133 * commonplace to use a regular expression for parsing the potential five
134 * components of a URI reference.
135 *
136 * The following line is the regular expression for breaking-down a
137 * well-formed URI reference into its components.
138 *
139 * <pre>
140 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
141 * 12 3 4 5 6 7 8 9
142 * </pre>
143 *
144 * The numbers in the second line above are only to assist readability; they
145 * indicate the reference points for each subexpression (i.e., each paired
146 * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
147 * For example, matching the above expression to
148 * <pre>
149 * http://www.ics.uci.edu/pub/ietf/uri/#Related
150 * </pre>
151 * results in the following subexpression matches:
152 * <pre>
153 * $1 = http:
154 * $2 = http
155 * $3 = //www.ics.uci.edu
156 * $4 = www.ics.uci.edu
157 * $5 = /pub/ietf/uri/
158 * $6 = <undefined>
159 * $7 = <undefined>
160 * $8 = #Related
161 * $9 = Related
162 * </pre>
163 * where <undefined> indicates that the component is not present, as is the
164 * case for the query component in the above example. Therefore, we can
165 * determine the value of the five components as
166 * <pre>
167 * scheme = $2
168 * authority = $4
169 * path = $5
170 * query = $7
171 * fragment = $9
172 * </pre>
173 *
174 * The regular expression has been modified slightly to expose the
175 * userInfo, domain, and port separately from the authority.
176 * The modified version yields
177 * <pre>
178 * $1 = http scheme
179 * $2 = <undefined> userInfo -\
180 * $3 = www.ics.uci.edu domain | authority
181 * $4 = <undefined> port -/
182 * $5 = /pub/ietf/uri/ path
183 * $6 = <undefined> query without ?
184 * $7 = Related fragment without #
185 * </pre>
186 * @type {!RegExp}
187 * @private
188 */
189goog.uri.utils.splitRe_ = new RegExp(
190 '^' +
191 '(?:' +
192 '([^:/?#.]+)' + // scheme - ignore special characters
193 // used by other URL parts such as :,
194 // ?, /, #, and .
195 ':)?' +
196 '(?://' +
197 '(?:([^/?#]*)@)?' + // userInfo
198 '([^/#?]*?)' + // domain
199 '(?::([0-9]+))?' + // port
200 '(?=[/#?]|$)' + // authority-terminating character
201 ')?' +
202 '([^?#]+)?' + // path
203 '(?:\\?([^#]*))?' + // query
204 '(?:#(.*))?' + // fragment
205 '$');
206
207
208/**
209 * The index of each URI component in the return value of goog.uri.utils.split.
210 * @enum {number}
211 */
212goog.uri.utils.ComponentIndex = {
213 SCHEME: 1,
214 USER_INFO: 2,
215 DOMAIN: 3,
216 PORT: 4,
217 PATH: 5,
218 QUERY_DATA: 6,
219 FRAGMENT: 7
220};
221
222
223/**
224 * Splits a URI into its component parts.
225 *
226 * Each component can be accessed via the component indices; for example:
227 * <pre>
228 * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA];
229 * </pre>
230 *
231 * @param {string} uri The URI string to examine.
232 * @return {!Array.<string|undefined>} Each component still URI-encoded.
233 * Each component that is present will contain the encoded value, whereas
234 * components that are not present will be undefined or empty, depending
235 * on the browser's regular expression implementation. Never null, since
236 * arbitrary strings may still look like path names.
237 */
238goog.uri.utils.split = function(uri) {
239 goog.uri.utils.phishingProtection_();
240
241 // See @return comment -- never null.
242 return /** @type {!Array.<string|undefined>} */ (
243 uri.match(goog.uri.utils.splitRe_));
244};
245
246
247/**
248 * Safari has a nasty bug where if you have an http URL with a username, e.g.,
249 * http://evil.com%2F@google.com/
250 * Safari will report that window.location.href is
251 * http://evil.com/google.com/
252 * so that anyone who tries to parse the domain of that URL will get
253 * the wrong domain. We've seen exploits where people use this to trick
254 * Safari into loading resources from evil domains.
255 *
256 * To work around this, we run a little "Safari phishing check", and throw
257 * an exception if we see this happening.
258 *
259 * There is no convenient place to put this check. We apply it to
260 * anyone doing URI parsing on Webkit. We're not happy about this, but
261 * it fixes the problem.
262 *
263 * This should be removed once Safari fixes their bug.
264 *
265 * Exploit reported by Masato Kinugawa.
266 *
267 * @type {boolean}
268 * @private
269 */
270goog.uri.utils.needsPhishingProtection_ = goog.userAgent.WEBKIT;
271
272
273/**
274 * Check to see if the user is being phished.
275 * @private
276 */
277goog.uri.utils.phishingProtection_ = function() {
278 if (goog.uri.utils.needsPhishingProtection_) {
279 // Turn protection off, so that we don't recurse.
280 goog.uri.utils.needsPhishingProtection_ = false;
281
282 // Use quoted access, just in case the user isn't using location externs.
283 var location = goog.global['location'];
284 if (location) {
285 var href = location['href'];
286 if (href) {
287 var domain = goog.uri.utils.getDomain(href);
288 if (domain && domain != location['hostname']) {
289 // Phishing attack
290 goog.uri.utils.needsPhishingProtection_ = true;
291 throw Error();
292 }
293 }
294 }
295 }
296};
297
298
299/**
300 * @param {?string} uri A possibly null string.
301 * @param {boolean=} opt_preserveReserved If true, percent-encoding of RFC-3986
302 * reserved characters will not be removed.
303 * @return {?string} The string URI-decoded, or null if uri is null.
304 * @private
305 */
306goog.uri.utils.decodeIfPossible_ = function(uri, opt_preserveReserved) {
307 if (!uri) {
308 return uri;
309 }
310
311 return opt_preserveReserved ? decodeURI(uri) : decodeURIComponent(uri);
312};
313
314
315/**
316 * Gets a URI component by index.
317 *
318 * It is preferred to use the getPathEncoded() variety of functions ahead,
319 * since they are more readable.
320 *
321 * @param {goog.uri.utils.ComponentIndex} componentIndex The component index.
322 * @param {string} uri The URI to examine.
323 * @return {?string} The still-encoded component, or null if the component
324 * is not present.
325 * @private
326 */
327goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) {
328 // Convert undefined, null, and empty string into null.
329 return goog.uri.utils.split(uri)[componentIndex] || null;
330};
331
332
333/**
334 * @param {string} uri The URI to examine.
335 * @return {?string} The protocol or scheme, or null if none. Does not
336 * include trailing colons or slashes.
337 */
338goog.uri.utils.getScheme = function(uri) {
339 return goog.uri.utils.getComponentByIndex_(
340 goog.uri.utils.ComponentIndex.SCHEME, uri);
341};
342
343
344/**
345 * Gets the effective scheme for the URL. If the URL is relative then the
346 * scheme is derived from the page's location.
347 * @param {string} uri The URI to examine.
348 * @return {string} The protocol or scheme, always lower case.
349 */
350goog.uri.utils.getEffectiveScheme = function(uri) {
351 var scheme = goog.uri.utils.getScheme(uri);
352 if (!scheme && self.location) {
353 var protocol = self.location.protocol;
354 scheme = protocol.substr(0, protocol.length - 1);
355 }
356 // NOTE: When called from a web worker in Firefox 3.5, location maybe null.
357 // All other browsers with web workers support self.location from the worker.
358 return scheme ? scheme.toLowerCase() : '';
359};
360
361
362/**
363 * @param {string} uri The URI to examine.
364 * @return {?string} The user name still encoded, or null if none.
365 */
366goog.uri.utils.getUserInfoEncoded = function(uri) {
367 return goog.uri.utils.getComponentByIndex_(
368 goog.uri.utils.ComponentIndex.USER_INFO, uri);
369};
370
371
372/**
373 * @param {string} uri The URI to examine.
374 * @return {?string} The decoded user info, or null if none.
375 */
376goog.uri.utils.getUserInfo = function(uri) {
377 return goog.uri.utils.decodeIfPossible_(
378 goog.uri.utils.getUserInfoEncoded(uri));
379};
380
381
382/**
383 * @param {string} uri The URI to examine.
384 * @return {?string} The domain name still encoded, or null if none.
385 */
386goog.uri.utils.getDomainEncoded = function(uri) {
387 return goog.uri.utils.getComponentByIndex_(
388 goog.uri.utils.ComponentIndex.DOMAIN, uri);
389};
390
391
392/**
393 * @param {string} uri The URI to examine.
394 * @return {?string} The decoded domain, or null if none.
395 */
396goog.uri.utils.getDomain = function(uri) {
397 return goog.uri.utils.decodeIfPossible_(
398 goog.uri.utils.getDomainEncoded(uri), true /* opt_preserveReserved */);
399};
400
401
402/**
403 * @param {string} uri The URI to examine.
404 * @return {?number} The port number, or null if none.
405 */
406goog.uri.utils.getPort = function(uri) {
407 // Coerce to a number. If the result of getComponentByIndex_ is null or
408 // non-numeric, the number coersion yields NaN. This will then return
409 // null for all non-numeric cases (though also zero, which isn't a relevant
410 // port number).
411 return Number(goog.uri.utils.getComponentByIndex_(
412 goog.uri.utils.ComponentIndex.PORT, uri)) || null;
413};
414
415
416/**
417 * @param {string} uri The URI to examine.
418 * @return {?string} The path still encoded, or null if none. Includes the
419 * leading slash, if any.
420 */
421goog.uri.utils.getPathEncoded = function(uri) {
422 return goog.uri.utils.getComponentByIndex_(
423 goog.uri.utils.ComponentIndex.PATH, uri);
424};
425
426
427/**
428 * @param {string} uri The URI to examine.
429 * @return {?string} The decoded path, or null if none. Includes the leading
430 * slash, if any.
431 */
432goog.uri.utils.getPath = function(uri) {
433 return goog.uri.utils.decodeIfPossible_(
434 goog.uri.utils.getPathEncoded(uri), true /* opt_preserveReserved */);
435};
436
437
438/**
439 * @param {string} uri The URI to examine.
440 * @return {?string} The query data still encoded, or null if none. Does not
441 * include the question mark itself.
442 */
443goog.uri.utils.getQueryData = function(uri) {
444 return goog.uri.utils.getComponentByIndex_(
445 goog.uri.utils.ComponentIndex.QUERY_DATA, uri);
446};
447
448
449/**
450 * @param {string} uri The URI to examine.
451 * @return {?string} The fragment identifier, or null if none. Does not
452 * include the hash mark itself.
453 */
454goog.uri.utils.getFragmentEncoded = function(uri) {
455 // The hash mark may not appear in any other part of the URL.
456 var hashIndex = uri.indexOf('#');
457 return hashIndex < 0 ? null : uri.substr(hashIndex + 1);
458};
459
460
461/**
462 * @param {string} uri The URI to examine.
463 * @param {?string} fragment The encoded fragment identifier, or null if none.
464 * Does not include the hash mark itself.
465 * @return {string} The URI with the fragment set.
466 */
467goog.uri.utils.setFragmentEncoded = function(uri, fragment) {
468 return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : '');
469};
470
471
472/**
473 * @param {string} uri The URI to examine.
474 * @return {?string} The decoded fragment identifier, or null if none. Does
475 * not include the hash mark.
476 */
477goog.uri.utils.getFragment = function(uri) {
478 return goog.uri.utils.decodeIfPossible_(
479 goog.uri.utils.getFragmentEncoded(uri));
480};
481
482
483/**
484 * Extracts everything up to the port of the URI.
485 * @param {string} uri The URI string.
486 * @return {string} Everything up to and including the port.
487 */
488goog.uri.utils.getHost = function(uri) {
489 var pieces = goog.uri.utils.split(uri);
490 return goog.uri.utils.buildFromEncodedParts(
491 pieces[goog.uri.utils.ComponentIndex.SCHEME],
492 pieces[goog.uri.utils.ComponentIndex.USER_INFO],
493 pieces[goog.uri.utils.ComponentIndex.DOMAIN],
494 pieces[goog.uri.utils.ComponentIndex.PORT]);
495};
496
497
498/**
499 * Extracts the path of the URL and everything after.
500 * @param {string} uri The URI string.
501 * @return {string} The URI, starting at the path and including the query
502 * parameters and fragment identifier.
503 */
504goog.uri.utils.getPathAndAfter = function(uri) {
505 var pieces = goog.uri.utils.split(uri);
506 return goog.uri.utils.buildFromEncodedParts(null, null, null, null,
507 pieces[goog.uri.utils.ComponentIndex.PATH],
508 pieces[goog.uri.utils.ComponentIndex.QUERY_DATA],
509 pieces[goog.uri.utils.ComponentIndex.FRAGMENT]);
510};
511
512
513/**
514 * Gets the URI with the fragment identifier removed.
515 * @param {string} uri The URI to examine.
516 * @return {string} Everything preceding the hash mark.
517 */
518goog.uri.utils.removeFragment = function(uri) {
519 // The hash mark may not appear in any other part of the URL.
520 var hashIndex = uri.indexOf('#');
521 return hashIndex < 0 ? uri : uri.substr(0, hashIndex);
522};
523
524
525/**
526 * Ensures that two URI's have the exact same domain, scheme, and port.
527 *
528 * Unlike the version in goog.Uri, this checks protocol, and therefore is
529 * suitable for checking against the browser's same-origin policy.
530 *
531 * @param {string} uri1 The first URI.
532 * @param {string} uri2 The second URI.
533 * @return {boolean} Whether they have the same scheme, domain and port.
534 */
535goog.uri.utils.haveSameDomain = function(uri1, uri2) {
536 var pieces1 = goog.uri.utils.split(uri1);
537 var pieces2 = goog.uri.utils.split(uri2);
538 return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] ==
539 pieces2[goog.uri.utils.ComponentIndex.DOMAIN] &&
540 pieces1[goog.uri.utils.ComponentIndex.SCHEME] ==
541 pieces2[goog.uri.utils.ComponentIndex.SCHEME] &&
542 pieces1[goog.uri.utils.ComponentIndex.PORT] ==
543 pieces2[goog.uri.utils.ComponentIndex.PORT];
544};
545
546
547/**
548 * Asserts that there are no fragment or query identifiers, only in uncompiled
549 * mode.
550 * @param {string} uri The URI to examine.
551 * @private
552 */
553goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) {
554 // NOTE: would use goog.asserts here, but jscompiler doesn't know that
555 // indexOf has no side effects.
556 if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) {
557 throw Error('goog.uri.utils: Fragment or query identifiers are not ' +
558 'supported: [' + uri + ']');
559 }
560};
561
562
563/**
564 * Supported query parameter values by the parameter serializing utilities.
565 *
566 * If a value is null or undefined, the key-value pair is skipped, as an easy
567 * way to omit parameters conditionally. Non-array parameters are converted
568 * to a string and URI encoded. Array values are expanded into multiple
569 * &key=value pairs, with each element stringized and URI-encoded.
570 *
571 * @typedef {*}
572 */
573goog.uri.utils.QueryValue;
574
575
576/**
577 * An array representing a set of query parameters with alternating keys
578 * and values.
579 *
580 * Keys are assumed to be URI encoded already and live at even indices. See
581 * goog.uri.utils.QueryValue for details on how parameter values are encoded.
582 *
583 * Example:
584 * <pre>
585 * var data = [
586 * // Simple param: ?name=BobBarker
587 * 'name', 'BobBarker',
588 * // Conditional param -- may be omitted entirely.
589 * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null,
590 * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null
591 * 'house', ['LosAngeles', 'NewYork', null]
592 * ];
593 * </pre>
594 *
595 * @typedef {!Array.<string|goog.uri.utils.QueryValue>}
596 */
597goog.uri.utils.QueryArray;
598
599
600/**
601 * Appends a URI and query data in a string buffer with special preconditions.
602 *
603 * Internal implementation utility, performing very few object allocations.
604 *
605 * @param {!Array.<string|undefined>} buffer A string buffer. The first element
606 * must be the base URI, and may have a fragment identifier. If the array
607 * contains more than one element, the second element must be an ampersand,
608 * and may be overwritten, depending on the base URI. Undefined elements
609 * are treated as empty-string.
610 * @return {string} The concatenated URI and query data.
611 * @private
612 */
613goog.uri.utils.appendQueryData_ = function(buffer) {
614 if (buffer[1]) {
615 // At least one query parameter was added. We need to check the
616 // punctuation mark, which is currently an ampersand, and also make sure
617 // there aren't any interfering fragment identifiers.
618 var baseUri = /** @type {string} */ (buffer[0]);
619 var hashIndex = baseUri.indexOf('#');
620 if (hashIndex >= 0) {
621 // Move the fragment off the base part of the URI into the end.
622 buffer.push(baseUri.substr(hashIndex));
623 buffer[0] = baseUri = baseUri.substr(0, hashIndex);
624 }
625 var questionIndex = baseUri.indexOf('?');
626 if (questionIndex < 0) {
627 // No question mark, so we need a question mark instead of an ampersand.
628 buffer[1] = '?';
629 } else if (questionIndex == baseUri.length - 1) {
630 // Question mark is the very last character of the existing URI, so don't
631 // append an additional delimiter.
632 buffer[1] = undefined;
633 }
634 }
635
636 return buffer.join('');
637};
638
639
640/**
641 * Appends key=value pairs to an array, supporting multi-valued objects.
642 * @param {string} key The key prefix.
643 * @param {goog.uri.utils.QueryValue} value The value to serialize.
644 * @param {!Array.<string>} pairs The array to which the 'key=value' strings
645 * should be appended.
646 * @private
647 */
648goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) {
649 if (goog.isArray(value)) {
650 // Convince the compiler it's an array.
651 goog.asserts.assertArray(value);
652 for (var j = 0; j < value.length; j++) {
653 // Convert to string explicitly, to short circuit the null and array
654 // logic in this function -- this ensures that null and undefined get
655 // written as literal 'null' and 'undefined', and arrays don't get
656 // expanded out but instead encoded in the default way.
657 goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs);
658 }
659 } else if (value != null) {
660 // Skip a top-level null or undefined entirely.
661 pairs.push('&', key,
662 // Check for empty string. Zero gets encoded into the url as literal
663 // strings. For empty string, skip the equal sign, to be consistent
664 // with UriBuilder.java.
665 value === '' ? '' : '=',
666 goog.string.urlEncode(value));
667 }
668};
669
670
671/**
672 * Builds a buffer of query data from a sequence of alternating keys and values.
673 *
674 * @param {!Array.<string|undefined>} buffer A string buffer to append to. The
675 * first element appended will be an '&', and may be replaced by the caller.
676 * @param {goog.uri.utils.QueryArray|Arguments} keysAndValues An array with
677 * alternating keys and values -- see the typedef.
678 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
679 * @return {!Array.<string|undefined>} The buffer argument.
680 * @private
681 */
682goog.uri.utils.buildQueryDataBuffer_ = function(
683 buffer, keysAndValues, opt_startIndex) {
684 goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0),
685 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.');
686
687 for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) {
688 goog.uri.utils.appendKeyValuePairs_(
689 keysAndValues[i], keysAndValues[i + 1], buffer);
690 }
691
692 return buffer;
693};
694
695
696/**
697 * Builds a query data string from a sequence of alternating keys and values.
698 * Currently generates "&key&" for empty args.
699 *
700 * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and
701 * values. See the typedef.
702 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
703 * @return {string} The encoded query string, in the form 'a=1&b=2'.
704 */
705goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) {
706 var buffer = goog.uri.utils.buildQueryDataBuffer_(
707 [], keysAndValues, opt_startIndex);
708 buffer[0] = ''; // Remove the leading ampersand.
709 return buffer.join('');
710};
711
712
713/**
714 * Builds a buffer of query data from a map.
715 *
716 * @param {!Array.<string|undefined>} buffer A string buffer to append to. The
717 * first element appended will be an '&', and may be replaced by the caller.
718 * @param {Object.<goog.uri.utils.QueryValue>} map An object where keys are
719 * URI-encoded parameter keys, and the values conform to the contract
720 * specified in the goog.uri.utils.QueryValue typedef.
721 * @return {!Array.<string|undefined>} The buffer argument.
722 * @private
723 */
724goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) {
725 for (var key in map) {
726 goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer);
727 }
728
729 return buffer;
730};
731
732
733/**
734 * Builds a query data string from a map.
735 * Currently generates "&key&" for empty args.
736 *
737 * @param {Object} map An object where keys are URI-encoded parameter keys,
738 * and the values are arbitrary types or arrays. Keys with a null value
739 * are dropped.
740 * @return {string} The encoded query string, in the form 'a=1&b=2'.
741 */
742goog.uri.utils.buildQueryDataFromMap = function(map) {
743 var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map);
744 buffer[0] = '';
745 return buffer.join('');
746};
747
748
749/**
750 * Appends URI parameters to an existing URI.
751 *
752 * The variable arguments may contain alternating keys and values. Keys are
753 * assumed to be already URI encoded. The values should not be URI-encoded,
754 * and will instead be encoded by this function.
755 * <pre>
756 * appendParams('http://www.foo.com?existing=true',
757 * 'key1', 'value1',
758 * 'key2', 'value?willBeEncoded',
759 * 'key3', ['valueA', 'valueB', 'valueC'],
760 * 'key4', null);
761 * result: 'http://www.foo.com?existing=true&' +
762 * 'key1=value1&' +
763 * 'key2=value%3FwillBeEncoded&' +
764 * 'key3=valueA&key3=valueB&key3=valueC'
765 * </pre>
766 *
767 * A single call to this function will not exhibit quadratic behavior in IE,
768 * whereas multiple repeated calls may, although the effect is limited by
769 * fact that URL's generally can't exceed 2kb.
770 *
771 * @param {string} uri The original URI, which may already have query data.
772 * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args
773 * An array or argument list conforming to goog.uri.utils.QueryArray.
774 * @return {string} The URI with all query parameters added.
775 */
776goog.uri.utils.appendParams = function(uri, var_args) {
777 return goog.uri.utils.appendQueryData_(
778 arguments.length == 2 ?
779 goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) :
780 goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1));
781};
782
783
784/**
785 * Appends query parameters from a map.
786 *
787 * @param {string} uri The original URI, which may already have query data.
788 * @param {Object} map An object where keys are URI-encoded parameter keys,
789 * and the values are arbitrary types or arrays. Keys with a null value
790 * are dropped.
791 * @return {string} The new parameters.
792 */
793goog.uri.utils.appendParamsFromMap = function(uri, map) {
794 return goog.uri.utils.appendQueryData_(
795 goog.uri.utils.buildQueryDataBufferFromMap_([uri], map));
796};
797
798
799/**
800 * Appends a single URI parameter.
801 *
802 * Repeated calls to this can exhibit quadratic behavior in IE6 due to the
803 * way string append works, though it should be limited given the 2kb limit.
804 *
805 * @param {string} uri The original URI, which may already have query data.
806 * @param {string} key The key, which must already be URI encoded.
807 * @param {*=} opt_value The value, which will be stringized and encoded
808 * (assumed not already to be encoded). If omitted, undefined, or null, the
809 * key will be added as a valueless parameter.
810 * @return {string} The URI with the query parameter added.
811 */
812goog.uri.utils.appendParam = function(uri, key, opt_value) {
813 var paramArr = [uri, '&', key];
814 if (goog.isDefAndNotNull(opt_value)) {
815 paramArr.push('=', goog.string.urlEncode(opt_value));
816 }
817 return goog.uri.utils.appendQueryData_(paramArr);
818};
819
820
821/**
822 * Finds the next instance of a query parameter with the specified name.
823 *
824 * Does not instantiate any objects.
825 *
826 * @param {string} uri The URI to search. May contain a fragment identifier
827 * if opt_hashIndex is specified.
828 * @param {number} startIndex The index to begin searching for the key at. A
829 * match may be found even if this is one character after the ampersand.
830 * @param {string} keyEncoded The URI-encoded key.
831 * @param {number} hashOrEndIndex Index to stop looking at. If a hash
832 * mark is present, it should be its index, otherwise it should be the
833 * length of the string.
834 * @return {number} The position of the first character in the key's name,
835 * immediately after either a question mark or a dot.
836 * @private
837 */
838goog.uri.utils.findParam_ = function(
839 uri, startIndex, keyEncoded, hashOrEndIndex) {
840 var index = startIndex;
841 var keyLength = keyEncoded.length;
842
843 // Search for the key itself and post-filter for surronuding punctuation,
844 // rather than expensively building a regexp.
845 while ((index = uri.indexOf(keyEncoded, index)) >= 0 &&
846 index < hashOrEndIndex) {
847 var precedingChar = uri.charCodeAt(index - 1);
848 // Ensure that the preceding character is '&' or '?'.
849 if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND ||
850 precedingChar == goog.uri.utils.CharCode_.QUESTION) {
851 // Ensure the following character is '&', '=', '#', or NaN
852 // (end of string).
853 var followingChar = uri.charCodeAt(index + keyLength);
854 if (!followingChar ||
855 followingChar == goog.uri.utils.CharCode_.EQUAL ||
856 followingChar == goog.uri.utils.CharCode_.AMPERSAND ||
857 followingChar == goog.uri.utils.CharCode_.HASH) {
858 return index;
859 }
860 }
861 index += keyLength + 1;
862 }
863
864 return -1;
865};
866
867
868/**
869 * Regular expression for finding a hash mark or end of string.
870 * @type {RegExp}
871 * @private
872 */
873goog.uri.utils.hashOrEndRe_ = /#|$/;
874
875
876/**
877 * Determines if the URI contains a specific key.
878 *
879 * Performs no object instantiations.
880 *
881 * @param {string} uri The URI to process. May contain a fragment
882 * identifier.
883 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
884 * @return {boolean} Whether the key is present.
885 */
886goog.uri.utils.hasParam = function(uri, keyEncoded) {
887 return goog.uri.utils.findParam_(uri, 0, keyEncoded,
888 uri.search(goog.uri.utils.hashOrEndRe_)) >= 0;
889};
890
891
892/**
893 * Gets the first value of a query parameter.
894 * @param {string} uri The URI to process. May contain a fragment.
895 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
896 * @return {?string} The first value of the parameter (URI-decoded), or null
897 * if the parameter is not found.
898 */
899goog.uri.utils.getParamValue = function(uri, keyEncoded) {
900 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
901 var foundIndex = goog.uri.utils.findParam_(
902 uri, 0, keyEncoded, hashOrEndIndex);
903
904 if (foundIndex < 0) {
905 return null;
906 } else {
907 var endPosition = uri.indexOf('&', foundIndex);
908 if (endPosition < 0 || endPosition > hashOrEndIndex) {
909 endPosition = hashOrEndIndex;
910 }
911 // Progress forth to the end of the "key=" or "key&" substring.
912 foundIndex += keyEncoded.length + 1;
913 // Use substr, because it (unlike substring) will return empty string
914 // if foundIndex > endPosition.
915 return goog.string.urlDecode(
916 uri.substr(foundIndex, endPosition - foundIndex));
917 }
918};
919
920
921/**
922 * Gets all values of a query parameter.
923 * @param {string} uri The URI to process. May contain a framgnet.
924 * @param {string} keyEncoded The URI-encoded key. Case-snsitive.
925 * @return {!Array.<string>} All URI-decoded values with the given key.
926 * If the key is not found, this will have length 0, but never be null.
927 */
928goog.uri.utils.getParamValues = function(uri, keyEncoded) {
929 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
930 var position = 0;
931 var foundIndex;
932 var result = [];
933
934 while ((foundIndex = goog.uri.utils.findParam_(
935 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
936 // Find where this parameter ends, either the '&' or the end of the
937 // query parameters.
938 position = uri.indexOf('&', foundIndex);
939 if (position < 0 || position > hashOrEndIndex) {
940 position = hashOrEndIndex;
941 }
942
943 // Progress forth to the end of the "key=" or "key&" substring.
944 foundIndex += keyEncoded.length + 1;
945 // Use substr, because it (unlike substring) will return empty string
946 // if foundIndex > position.
947 result.push(goog.string.urlDecode(uri.substr(
948 foundIndex, position - foundIndex)));
949 }
950
951 return result;
952};
953
954
955/**
956 * Regexp to find trailing question marks and ampersands.
957 * @type {RegExp}
958 * @private
959 */
960goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/;
961
962
963/**
964 * Removes all instances of a query parameter.
965 * @param {string} uri The URI to process. Must not contain a fragment.
966 * @param {string} keyEncoded The URI-encoded key.
967 * @return {string} The URI with all instances of the parameter removed.
968 */
969goog.uri.utils.removeParam = function(uri, keyEncoded) {
970 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
971 var position = 0;
972 var foundIndex;
973 var buffer = [];
974
975 // Look for a query parameter.
976 while ((foundIndex = goog.uri.utils.findParam_(
977 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
978 // Get the portion of the query string up to, but not including, the ?
979 // or & starting the parameter.
980 buffer.push(uri.substring(position, foundIndex));
981 // Progress to immediately after the '&'. If not found, go to the end.
982 // Avoid including the hash mark.
983 position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex,
984 hashOrEndIndex);
985 }
986
987 // Append everything that is remaining.
988 buffer.push(uri.substr(position));
989
990 // Join the buffer, and remove trailing punctuation that remains.
991 return buffer.join('').replace(
992 goog.uri.utils.trailingQueryPunctuationRe_, '$1');
993};
994
995
996/**
997 * Replaces all existing definitions of a parameter with a single definition.
998 *
999 * Repeated calls to this can exhibit quadratic behavior due to the need to
1000 * find existing instances and reconstruct the string, though it should be
1001 * limited given the 2kb limit. Consider using appendParams to append multiple
1002 * parameters in bulk.
1003 *
1004 * @param {string} uri The original URI, which may already have query data.
1005 * @param {string} keyEncoded The key, which must already be URI encoded.
1006 * @param {*} value The value, which will be stringized and encoded (assumed
1007 * not already to be encoded).
1008 * @return {string} The URI with the query parameter added.
1009 */
1010goog.uri.utils.setParam = function(uri, keyEncoded, value) {
1011 return goog.uri.utils.appendParam(
1012 goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value);
1013};
1014
1015
1016/**
1017 * Generates a URI path using a given URI and a path with checks to
1018 * prevent consecutive "//". The baseUri passed in must not contain
1019 * query or fragment identifiers. The path to append may not contain query or
1020 * fragment identifiers.
1021 *
1022 * @param {string} baseUri URI to use as the base.
1023 * @param {string} path Path to append.
1024 * @return {string} Updated URI.
1025 */
1026goog.uri.utils.appendPath = function(baseUri, path) {
1027 goog.uri.utils.assertNoFragmentsOrQueries_(baseUri);
1028
1029 // Remove any trailing '/'
1030 if (goog.string.endsWith(baseUri, '/')) {
1031 baseUri = baseUri.substr(0, baseUri.length - 1);
1032 }
1033 // Remove any leading '/'
1034 if (goog.string.startsWith(path, '/')) {
1035 path = path.substr(1);
1036 }
1037 return goog.string.buildString(baseUri, '/', path);
1038};
1039
1040
1041/**
1042 * Replaces the path.
1043 * @param {string} uri URI to use as the base.
1044 * @param {string} path New path.
1045 * @return {string} Updated URI.
1046 */
1047goog.uri.utils.setPath = function(uri, path) {
1048 // Add any missing '/'.
1049 if (!goog.string.startsWith(path, '/')) {
1050 path = '/' + path;
1051 }
1052 var parts = goog.uri.utils.split(uri);
1053 return goog.uri.utils.buildFromEncodedParts(
1054 parts[goog.uri.utils.ComponentIndex.SCHEME],
1055 parts[goog.uri.utils.ComponentIndex.USER_INFO],
1056 parts[goog.uri.utils.ComponentIndex.DOMAIN],
1057 parts[goog.uri.utils.ComponentIndex.PORT],
1058 path,
1059 parts[goog.uri.utils.ComponentIndex.QUERY_DATA],
1060 parts[goog.uri.utils.ComponentIndex.FRAGMENT]);
1061};
1062
1063
1064/**
1065 * Standard supported query parameters.
1066 * @enum {string}
1067 */
1068goog.uri.utils.StandardQueryParam = {
1069
1070 /** Unused parameter for unique-ifying. */
1071 RANDOM: 'zx'
1072};
1073
1074
1075/**
1076 * Sets the zx parameter of a URI to a random value.
1077 * @param {string} uri Any URI.
1078 * @return {string} That URI with the "zx" parameter added or replaced to
1079 * contain a random string.
1080 */
1081goog.uri.utils.makeUnique = function(uri) {
1082 return goog.uri.utils.setParam(uri,
1083 goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString());
1084};