lib/goog/uri/utils.js

1// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Simple utilities for dealing with URI strings.
17 *
18 * This is intended to be a lightweight alternative to constructing goog.Uri
19 * objects. Whereas goog.Uri adds several kilobytes to the binary regardless
20 * of how much of its functionality you use, this is designed to be a set of
21 * mostly-independent utilities so that the compiler includes only what is
22 * necessary for the task. Estimated savings of porting is 5k pre-gzip and
23 * 1.5k post-gzip. To ensure the savings remain, future developers should
24 * avoid adding new functionality to existing functions, but instead create
25 * new ones and factor out shared code.
26 *
27 * Many of these utilities have limited functionality, tailored to common
28 * cases. The query parameter utilities assume that the parameter keys are
29 * already encoded, since most keys are compile-time alphanumeric strings. The
30 * query parameter mutation utilities also do not tolerate fragment identifiers.
31 *
32 * By design, these functions can be slower than goog.Uri equivalents.
33 * Repeated calls to some of functions may be quadratic in behavior for IE,
34 * although the effect is somewhat limited given the 2kb limit.
35 *
36 * One advantage of the limited functionality here is that this approach is
37 * less sensitive to differences in URI encodings than goog.Uri, since these
38 * functions modify the strings in place, rather than decoding and
39 * re-encoding.
40 *
41 * Uses features of RFC 3986 for parsing/formatting URIs:
42 * http://www.ietf.org/rfc/rfc3986.txt
43 *
44 * @author gboyer@google.com (Garrett Boyer) - The "lightened" design.
45 */
46
47goog.provide('goog.uri.utils');
48goog.provide('goog.uri.utils.ComponentIndex');
49goog.provide('goog.uri.utils.QueryArray');
50goog.provide('goog.uri.utils.QueryValue');
51goog.provide('goog.uri.utils.StandardQueryParam');
52
53goog.require('goog.asserts');
54goog.require('goog.string');
55goog.require('goog.userAgent');
56
57
58/**
59 * Character codes inlined to avoid object allocations due to charCode.
60 * @enum {number}
61 * @private
62 */
63goog.uri.utils.CharCode_ = {
64 AMPERSAND: 38,
65 EQUAL: 61,
66 HASH: 35,
67 QUESTION: 63
68};
69
70
71/**
72 * Builds a URI string from already-encoded parts.
73 *
74 * No encoding is performed. Any component may be omitted as either null or
75 * undefined.
76 *
77 * @param {?string=} opt_scheme The scheme such as 'http'.
78 * @param {?string=} opt_userInfo The user name before the '@'.
79 * @param {?string=} opt_domain The domain such as 'www.google.com', already
80 * URI-encoded.
81 * @param {(string|number|null)=} opt_port The port number.
82 * @param {?string=} opt_path The path, already URI-encoded. If it is not
83 * empty, it must begin with a slash.
84 * @param {?string=} opt_queryData The URI-encoded query data.
85 * @param {?string=} opt_fragment The URI-encoded fragment identifier.
86 * @return {string} The fully combined URI.
87 */
88goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo,
89 opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) {
90 var out = '';
91
92 if (opt_scheme) {
93 out += opt_scheme + ':';
94 }
95
96 if (opt_domain) {
97 out += '//';
98
99 if (opt_userInfo) {
100 out += opt_userInfo + '@';
101 }
102
103 out += opt_domain;
104
105 if (opt_port) {
106 out += ':' + opt_port;
107 }
108 }
109
110 if (opt_path) {
111 out += opt_path;
112 }
113
114 if (opt_queryData) {
115 out += '?' + opt_queryData;
116 }
117
118 if (opt_fragment) {
119 out += '#' + opt_fragment;
120 }
121
122 return out;
123};
124
125
126/**
127 * A regular expression for breaking a URI into its component parts.
128 *
129 * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B
130 * As the "first-match-wins" algorithm is identical to the "greedy"
131 * disambiguation method used by POSIX regular expressions, it is natural and
132 * commonplace to use a regular expression for parsing the potential five
133 * components of a URI reference.
134 *
135 * The following line is the regular expression for breaking-down a
136 * well-formed URI reference into its components.
137 *
138 * <pre>
139 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
140 * 12 3 4 5 6 7 8 9
141 * </pre>
142 *
143 * The numbers in the second line above are only to assist readability; they
144 * indicate the reference points for each subexpression (i.e., each paired
145 * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
146 * For example, matching the above expression to
147 * <pre>
148 * http://www.ics.uci.edu/pub/ietf/uri/#Related
149 * </pre>
150 * results in the following subexpression matches:
151 * <pre>
152 * $1 = http:
153 * $2 = http
154 * $3 = //www.ics.uci.edu
155 * $4 = www.ics.uci.edu
156 * $5 = /pub/ietf/uri/
157 * $6 = <undefined>
158 * $7 = <undefined>
159 * $8 = #Related
160 * $9 = Related
161 * </pre>
162 * where <undefined> indicates that the component is not present, as is the
163 * case for the query component in the above example. Therefore, we can
164 * determine the value of the five components as
165 * <pre>
166 * scheme = $2
167 * authority = $4
168 * path = $5
169 * query = $7
170 * fragment = $9
171 * </pre>
172 *
173 * The regular expression has been modified slightly to expose the
174 * userInfo, domain, and port separately from the authority.
175 * The modified version yields
176 * <pre>
177 * $1 = http scheme
178 * $2 = <undefined> userInfo -\
179 * $3 = www.ics.uci.edu domain | authority
180 * $4 = <undefined> port -/
181 * $5 = /pub/ietf/uri/ path
182 * $6 = <undefined> query without ?
183 * $7 = Related fragment without #
184 * </pre>
185 * @type {!RegExp}
186 * @private
187 */
188goog.uri.utils.splitRe_ = new RegExp(
189 '^' +
190 '(?:' +
191 '([^:/?#.]+)' + // scheme - ignore special characters
192 // used by other URL parts such as :,
193 // ?, /, #, and .
194 ':)?' +
195 '(?://' +
196 '(?:([^/?#]*)@)?' + // userInfo
197 '([^/#?]*?)' + // domain
198 '(?::([0-9]+))?' + // port
199 '(?=[/#?]|$)' + // authority-terminating character
200 ')?' +
201 '([^?#]+)?' + // path
202 '(?:\\?([^#]*))?' + // query
203 '(?:#(.*))?' + // fragment
204 '$');
205
206
207/**
208 * The index of each URI component in the return value of goog.uri.utils.split.
209 * @enum {number}
210 */
211goog.uri.utils.ComponentIndex = {
212 SCHEME: 1,
213 USER_INFO: 2,
214 DOMAIN: 3,
215 PORT: 4,
216 PATH: 5,
217 QUERY_DATA: 6,
218 FRAGMENT: 7
219};
220
221
222/**
223 * Splits a URI into its component parts.
224 *
225 * Each component can be accessed via the component indices; for example:
226 * <pre>
227 * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA];
228 * </pre>
229 *
230 * @param {string} uri The URI string to examine.
231 * @return {!Array<string|undefined>} Each component still URI-encoded.
232 * Each component that is present will contain the encoded value, whereas
233 * components that are not present will be undefined or empty, depending
234 * on the browser's regular expression implementation. Never null, since
235 * arbitrary strings may still look like path names.
236 */
237goog.uri.utils.split = function(uri) {
238 goog.uri.utils.phishingProtection_();
239
240 // See @return comment -- never null.
241 return /** @type {!Array<string|undefined>} */ (
242 uri.match(goog.uri.utils.splitRe_));
243};
244
245
246/**
247 * Safari has a nasty bug where if you have an http URL with a username, e.g.,
248 * http://evil.com%2F@google.com/
249 * Safari will report that window.location.href is
250 * http://evil.com/google.com/
251 * so that anyone who tries to parse the domain of that URL will get
252 * the wrong domain. We've seen exploits where people use this to trick
253 * Safari into loading resources from evil domains.
254 *
255 * To work around this, we run a little "Safari phishing check", and throw
256 * an exception if we see this happening.
257 *
258 * There is no convenient place to put this check. We apply it to
259 * anyone doing URI parsing on Webkit. We're not happy about this, but
260 * it fixes the problem.
261 *
262 * This should be removed once Safari fixes their bug.
263 *
264 * Exploit reported by Masato Kinugawa.
265 *
266 * @type {boolean}
267 * @private
268 */
269goog.uri.utils.needsPhishingProtection_ = goog.userAgent.WEBKIT;
270
271
272/**
273 * Check to see if the user is being phished.
274 * @private
275 */
276goog.uri.utils.phishingProtection_ = function() {
277 if (goog.uri.utils.needsPhishingProtection_) {
278 // Turn protection off, so that we don't recurse.
279 goog.uri.utils.needsPhishingProtection_ = false;
280
281 // Use quoted access, just in case the user isn't using location externs.
282 var location = goog.global['location'];
283 if (location) {
284 var href = location['href'];
285 if (href) {
286 var domain = goog.uri.utils.getDomain(href);
287 if (domain && domain != location['hostname']) {
288 // Phishing attack
289 goog.uri.utils.needsPhishingProtection_ = true;
290 throw Error();
291 }
292 }
293 }
294 }
295};
296
297
298/**
299 * @param {?string} uri A possibly null string.
300 * @param {boolean=} opt_preserveReserved If true, percent-encoding of RFC-3986
301 * reserved characters will not be removed.
302 * @return {?string} The string URI-decoded, or null if uri is null.
303 * @private
304 */
305goog.uri.utils.decodeIfPossible_ = function(uri, opt_preserveReserved) {
306 if (!uri) {
307 return uri;
308 }
309
310 return opt_preserveReserved ? decodeURI(uri) : decodeURIComponent(uri);
311};
312
313
314/**
315 * Gets a URI component by index.
316 *
317 * It is preferred to use the getPathEncoded() variety of functions ahead,
318 * since they are more readable.
319 *
320 * @param {goog.uri.utils.ComponentIndex} componentIndex The component index.
321 * @param {string} uri The URI to examine.
322 * @return {?string} The still-encoded component, or null if the component
323 * is not present.
324 * @private
325 */
326goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) {
327 // Convert undefined, null, and empty string into null.
328 return goog.uri.utils.split(uri)[componentIndex] || null;
329};
330
331
332/**
333 * @param {string} uri The URI to examine.
334 * @return {?string} The protocol or scheme, or null if none. Does not
335 * include trailing colons or slashes.
336 */
337goog.uri.utils.getScheme = function(uri) {
338 return goog.uri.utils.getComponentByIndex_(
339 goog.uri.utils.ComponentIndex.SCHEME, uri);
340};
341
342
343/**
344 * Gets the effective scheme for the URL. If the URL is relative then the
345 * scheme is derived from the page's location.
346 * @param {string} uri The URI to examine.
347 * @return {string} The protocol or scheme, always lower case.
348 */
349goog.uri.utils.getEffectiveScheme = function(uri) {
350 var scheme = goog.uri.utils.getScheme(uri);
351 if (!scheme && self.location) {
352 var protocol = self.location.protocol;
353 scheme = protocol.substr(0, protocol.length - 1);
354 }
355 // NOTE: When called from a web worker in Firefox 3.5, location maybe null.
356 // All other browsers with web workers support self.location from the worker.
357 return scheme ? scheme.toLowerCase() : '';
358};
359
360
361/**
362 * @param {string} uri The URI to examine.
363 * @return {?string} The user name still encoded, or null if none.
364 */
365goog.uri.utils.getUserInfoEncoded = function(uri) {
366 return goog.uri.utils.getComponentByIndex_(
367 goog.uri.utils.ComponentIndex.USER_INFO, uri);
368};
369
370
371/**
372 * @param {string} uri The URI to examine.
373 * @return {?string} The decoded user info, or null if none.
374 */
375goog.uri.utils.getUserInfo = function(uri) {
376 return goog.uri.utils.decodeIfPossible_(
377 goog.uri.utils.getUserInfoEncoded(uri));
378};
379
380
381/**
382 * @param {string} uri The URI to examine.
383 * @return {?string} The domain name still encoded, or null if none.
384 */
385goog.uri.utils.getDomainEncoded = function(uri) {
386 return goog.uri.utils.getComponentByIndex_(
387 goog.uri.utils.ComponentIndex.DOMAIN, uri);
388};
389
390
391/**
392 * @param {string} uri The URI to examine.
393 * @return {?string} The decoded domain, or null if none.
394 */
395goog.uri.utils.getDomain = function(uri) {
396 return goog.uri.utils.decodeIfPossible_(
397 goog.uri.utils.getDomainEncoded(uri), true /* opt_preserveReserved */);
398};
399
400
401/**
402 * @param {string} uri The URI to examine.
403 * @return {?number} The port number, or null if none.
404 */
405goog.uri.utils.getPort = function(uri) {
406 // Coerce to a number. If the result of getComponentByIndex_ is null or
407 // non-numeric, the number coersion yields NaN. This will then return
408 // null for all non-numeric cases (though also zero, which isn't a relevant
409 // port number).
410 return Number(goog.uri.utils.getComponentByIndex_(
411 goog.uri.utils.ComponentIndex.PORT, uri)) || null;
412};
413
414
415/**
416 * @param {string} uri The URI to examine.
417 * @return {?string} The path still encoded, or null if none. Includes the
418 * leading slash, if any.
419 */
420goog.uri.utils.getPathEncoded = function(uri) {
421 return goog.uri.utils.getComponentByIndex_(
422 goog.uri.utils.ComponentIndex.PATH, uri);
423};
424
425
426/**
427 * @param {string} uri The URI to examine.
428 * @return {?string} The decoded path, or null if none. Includes the leading
429 * slash, if any.
430 */
431goog.uri.utils.getPath = function(uri) {
432 return goog.uri.utils.decodeIfPossible_(
433 goog.uri.utils.getPathEncoded(uri), true /* opt_preserveReserved */);
434};
435
436
437/**
438 * @param {string} uri The URI to examine.
439 * @return {?string} The query data still encoded, or null if none. Does not
440 * include the question mark itself.
441 */
442goog.uri.utils.getQueryData = function(uri) {
443 return goog.uri.utils.getComponentByIndex_(
444 goog.uri.utils.ComponentIndex.QUERY_DATA, uri);
445};
446
447
448/**
449 * @param {string} uri The URI to examine.
450 * @return {?string} The fragment identifier, or null if none. Does not
451 * include the hash mark itself.
452 */
453goog.uri.utils.getFragmentEncoded = function(uri) {
454 // The hash mark may not appear in any other part of the URL.
455 var hashIndex = uri.indexOf('#');
456 return hashIndex < 0 ? null : uri.substr(hashIndex + 1);
457};
458
459
460/**
461 * @param {string} uri The URI to examine.
462 * @param {?string} fragment The encoded fragment identifier, or null if none.
463 * Does not include the hash mark itself.
464 * @return {string} The URI with the fragment set.
465 */
466goog.uri.utils.setFragmentEncoded = function(uri, fragment) {
467 return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : '');
468};
469
470
471/**
472 * @param {string} uri The URI to examine.
473 * @return {?string} The decoded fragment identifier, or null if none. Does
474 * not include the hash mark.
475 */
476goog.uri.utils.getFragment = function(uri) {
477 return goog.uri.utils.decodeIfPossible_(
478 goog.uri.utils.getFragmentEncoded(uri));
479};
480
481
482/**
483 * Extracts everything up to the port of the URI.
484 * @param {string} uri The URI string.
485 * @return {string} Everything up to and including the port.
486 */
487goog.uri.utils.getHost = function(uri) {
488 var pieces = goog.uri.utils.split(uri);
489 return goog.uri.utils.buildFromEncodedParts(
490 pieces[goog.uri.utils.ComponentIndex.SCHEME],
491 pieces[goog.uri.utils.ComponentIndex.USER_INFO],
492 pieces[goog.uri.utils.ComponentIndex.DOMAIN],
493 pieces[goog.uri.utils.ComponentIndex.PORT]);
494};
495
496
497/**
498 * Extracts the path of the URL and everything after.
499 * @param {string} uri The URI string.
500 * @return {string} The URI, starting at the path and including the query
501 * parameters and fragment identifier.
502 */
503goog.uri.utils.getPathAndAfter = function(uri) {
504 var pieces = goog.uri.utils.split(uri);
505 return goog.uri.utils.buildFromEncodedParts(null, null, null, null,
506 pieces[goog.uri.utils.ComponentIndex.PATH],
507 pieces[goog.uri.utils.ComponentIndex.QUERY_DATA],
508 pieces[goog.uri.utils.ComponentIndex.FRAGMENT]);
509};
510
511
512/**
513 * Gets the URI with the fragment identifier removed.
514 * @param {string} uri The URI to examine.
515 * @return {string} Everything preceding the hash mark.
516 */
517goog.uri.utils.removeFragment = function(uri) {
518 // The hash mark may not appear in any other part of the URL.
519 var hashIndex = uri.indexOf('#');
520 return hashIndex < 0 ? uri : uri.substr(0, hashIndex);
521};
522
523
524/**
525 * Ensures that two URI's have the exact same domain, scheme, and port.
526 *
527 * Unlike the version in goog.Uri, this checks protocol, and therefore is
528 * suitable for checking against the browser's same-origin policy.
529 *
530 * @param {string} uri1 The first URI.
531 * @param {string} uri2 The second URI.
532 * @return {boolean} Whether they have the same scheme, domain and port.
533 */
534goog.uri.utils.haveSameDomain = function(uri1, uri2) {
535 var pieces1 = goog.uri.utils.split(uri1);
536 var pieces2 = goog.uri.utils.split(uri2);
537 return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] ==
538 pieces2[goog.uri.utils.ComponentIndex.DOMAIN] &&
539 pieces1[goog.uri.utils.ComponentIndex.SCHEME] ==
540 pieces2[goog.uri.utils.ComponentIndex.SCHEME] &&
541 pieces1[goog.uri.utils.ComponentIndex.PORT] ==
542 pieces2[goog.uri.utils.ComponentIndex.PORT];
543};
544
545
546/**
547 * Asserts that there are no fragment or query identifiers, only in uncompiled
548 * mode.
549 * @param {string} uri The URI to examine.
550 * @private
551 */
552goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) {
553 // NOTE: would use goog.asserts here, but jscompiler doesn't know that
554 // indexOf has no side effects.
555 if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) {
556 throw Error('goog.uri.utils: Fragment or query identifiers are not ' +
557 'supported: [' + uri + ']');
558 }
559};
560
561
562/**
563 * Supported query parameter values by the parameter serializing utilities.
564 *
565 * If a value is null or undefined, the key-value pair is skipped, as an easy
566 * way to omit parameters conditionally. Non-array parameters are converted
567 * to a string and URI encoded. Array values are expanded into multiple
568 * &key=value pairs, with each element stringized and URI-encoded.
569 *
570 * @typedef {*}
571 */
572goog.uri.utils.QueryValue;
573
574
575/**
576 * An array representing a set of query parameters with alternating keys
577 * and values.
578 *
579 * Keys are assumed to be URI encoded already and live at even indices. See
580 * goog.uri.utils.QueryValue for details on how parameter values are encoded.
581 *
582 * Example:
583 * <pre>
584 * var data = [
585 * // Simple param: ?name=BobBarker
586 * 'name', 'BobBarker',
587 * // Conditional param -- may be omitted entirely.
588 * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null,
589 * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null
590 * 'house', ['LosAngeles', 'NewYork', null]
591 * ];
592 * </pre>
593 *
594 * @typedef {!Array<string|goog.uri.utils.QueryValue>}
595 */
596goog.uri.utils.QueryArray;
597
598
599/**
600 * Appends a URI and query data in a string buffer with special preconditions.
601 *
602 * Internal implementation utility, performing very few object allocations.
603 *
604 * @param {!Array<string|undefined>} buffer A string buffer. The first element
605 * must be the base URI, and may have a fragment identifier. If the array
606 * contains more than one element, the second element must be an ampersand,
607 * and may be overwritten, depending on the base URI. Undefined elements
608 * are treated as empty-string.
609 * @return {string} The concatenated URI and query data.
610 * @private
611 */
612goog.uri.utils.appendQueryData_ = function(buffer) {
613 if (buffer[1]) {
614 // At least one query parameter was added. We need to check the
615 // punctuation mark, which is currently an ampersand, and also make sure
616 // there aren't any interfering fragment identifiers.
617 var baseUri = /** @type {string} */ (buffer[0]);
618 var hashIndex = baseUri.indexOf('#');
619 if (hashIndex >= 0) {
620 // Move the fragment off the base part of the URI into the end.
621 buffer.push(baseUri.substr(hashIndex));
622 buffer[0] = baseUri = baseUri.substr(0, hashIndex);
623 }
624 var questionIndex = baseUri.indexOf('?');
625 if (questionIndex < 0) {
626 // No question mark, so we need a question mark instead of an ampersand.
627 buffer[1] = '?';
628 } else if (questionIndex == baseUri.length - 1) {
629 // Question mark is the very last character of the existing URI, so don't
630 // append an additional delimiter.
631 buffer[1] = undefined;
632 }
633 }
634
635 return buffer.join('');
636};
637
638
639/**
640 * Appends key=value pairs to an array, supporting multi-valued objects.
641 * @param {string} key The key prefix.
642 * @param {goog.uri.utils.QueryValue} value The value to serialize.
643 * @param {!Array<string>} pairs The array to which the 'key=value' strings
644 * should be appended.
645 * @private
646 */
647goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) {
648 if (goog.isArray(value)) {
649 // Convince the compiler it's an array.
650 goog.asserts.assertArray(value);
651 for (var j = 0; j < value.length; j++) {
652 // Convert to string explicitly, to short circuit the null and array
653 // logic in this function -- this ensures that null and undefined get
654 // written as literal 'null' and 'undefined', and arrays don't get
655 // expanded out but instead encoded in the default way.
656 goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs);
657 }
658 } else if (value != null) {
659 // Skip a top-level null or undefined entirely.
660 pairs.push('&', key,
661 // Check for empty string. Zero gets encoded into the url as literal
662 // strings. For empty string, skip the equal sign, to be consistent
663 // with UriBuilder.java.
664 value === '' ? '' : '=',
665 goog.string.urlEncode(value));
666 }
667};
668
669
670/**
671 * Builds a buffer of query data from a sequence of alternating keys and values.
672 *
673 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
674 * first element appended will be an '&', and may be replaced by the caller.
675 * @param {!goog.uri.utils.QueryArray|!Arguments} keysAndValues An array with
676 * alternating keys and values -- see the typedef.
677 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
678 * @return {!Array<string|undefined>} The buffer argument.
679 * @private
680 */
681goog.uri.utils.buildQueryDataBuffer_ = function(
682 buffer, keysAndValues, opt_startIndex) {
683 goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0),
684 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.');
685
686 for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) {
687 goog.uri.utils.appendKeyValuePairs_(
688 keysAndValues[i], keysAndValues[i + 1], buffer);
689 }
690
691 return buffer;
692};
693
694
695/**
696 * Builds a query data string from a sequence of alternating keys and values.
697 * Currently generates "&key&" for empty args.
698 *
699 * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and
700 * values. See the typedef.
701 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
702 * @return {string} The encoded query string, in the form 'a=1&b=2'.
703 */
704goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) {
705 var buffer = goog.uri.utils.buildQueryDataBuffer_(
706 [], keysAndValues, opt_startIndex);
707 buffer[0] = ''; // Remove the leading ampersand.
708 return buffer.join('');
709};
710
711
712/**
713 * Builds a buffer of query data from a map.
714 *
715 * @param {!Array<string|undefined>} buffer A string buffer to append to. The
716 * first element appended will be an '&', and may be replaced by the caller.
717 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
718 * are URI-encoded parameter keys, and the values conform to the contract
719 * specified in the goog.uri.utils.QueryValue typedef.
720 * @return {!Array<string|undefined>} The buffer argument.
721 * @private
722 */
723goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) {
724 for (var key in map) {
725 goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer);
726 }
727
728 return buffer;
729};
730
731
732/**
733 * Builds a query data string from a map.
734 * Currently generates "&key&" for empty args.
735 *
736 * @param {!Object<string, goog.uri.utils.QueryValue>} map An object where keys
737 * are URI-encoded parameter keys, and the values are arbitrary types
738 * or arrays. Keys with a null value are dropped.
739 * @return {string} The encoded query string, in the form 'a=1&b=2'.
740 */
741goog.uri.utils.buildQueryDataFromMap = function(map) {
742 var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map);
743 buffer[0] = '';
744 return buffer.join('');
745};
746
747
748/**
749 * Appends URI parameters to an existing URI.
750 *
751 * The variable arguments may contain alternating keys and values. Keys are
752 * assumed to be already URI encoded. The values should not be URI-encoded,
753 * and will instead be encoded by this function.
754 * <pre>
755 * appendParams('http://www.foo.com?existing=true',
756 * 'key1', 'value1',
757 * 'key2', 'value?willBeEncoded',
758 * 'key3', ['valueA', 'valueB', 'valueC'],
759 * 'key4', null);
760 * result: 'http://www.foo.com?existing=true&' +
761 * 'key1=value1&' +
762 * 'key2=value%3FwillBeEncoded&' +
763 * 'key3=valueA&key3=valueB&key3=valueC'
764 * </pre>
765 *
766 * A single call to this function will not exhibit quadratic behavior in IE,
767 * whereas multiple repeated calls may, although the effect is limited by
768 * fact that URL's generally can't exceed 2kb.
769 *
770 * @param {string} uri The original URI, which may already have query data.
771 * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args
772 * An array or argument list conforming to goog.uri.utils.QueryArray.
773 * @return {string} The URI with all query parameters added.
774 */
775goog.uri.utils.appendParams = function(uri, var_args) {
776 return goog.uri.utils.appendQueryData_(
777 arguments.length == 2 ?
778 goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) :
779 goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1));
780};
781
782
783/**
784 * Appends query parameters from a map.
785 *
786 * @param {string} uri The original URI, which may already have query data.
787 * @param {!Object<goog.uri.utils.QueryValue>} map An object where keys are
788 * URI-encoded parameter keys, and the values are arbitrary types or arrays.
789 * Keys with a null value are dropped.
790 * @return {string} The new parameters.
791 */
792goog.uri.utils.appendParamsFromMap = function(uri, map) {
793 return goog.uri.utils.appendQueryData_(
794 goog.uri.utils.buildQueryDataBufferFromMap_([uri], map));
795};
796
797
798/**
799 * Appends a single URI parameter.
800 *
801 * Repeated calls to this can exhibit quadratic behavior in IE6 due to the
802 * way string append works, though it should be limited given the 2kb limit.
803 *
804 * @param {string} uri The original URI, which may already have query data.
805 * @param {string} key The key, which must already be URI encoded.
806 * @param {*=} opt_value The value, which will be stringized and encoded
807 * (assumed not already to be encoded). If omitted, undefined, or null, the
808 * key will be added as a valueless parameter.
809 * @return {string} The URI with the query parameter added.
810 */
811goog.uri.utils.appendParam = function(uri, key, opt_value) {
812 var paramArr = [uri, '&', key];
813 if (goog.isDefAndNotNull(opt_value)) {
814 paramArr.push('=', goog.string.urlEncode(opt_value));
815 }
816 return goog.uri.utils.appendQueryData_(paramArr);
817};
818
819
820/**
821 * Finds the next instance of a query parameter with the specified name.
822 *
823 * Does not instantiate any objects.
824 *
825 * @param {string} uri The URI to search. May contain a fragment identifier
826 * if opt_hashIndex is specified.
827 * @param {number} startIndex The index to begin searching for the key at. A
828 * match may be found even if this is one character after the ampersand.
829 * @param {string} keyEncoded The URI-encoded key.
830 * @param {number} hashOrEndIndex Index to stop looking at. If a hash
831 * mark is present, it should be its index, otherwise it should be the
832 * length of the string.
833 * @return {number} The position of the first character in the key's name,
834 * immediately after either a question mark or a dot.
835 * @private
836 */
837goog.uri.utils.findParam_ = function(
838 uri, startIndex, keyEncoded, hashOrEndIndex) {
839 var index = startIndex;
840 var keyLength = keyEncoded.length;
841
842 // Search for the key itself and post-filter for surronuding punctuation,
843 // rather than expensively building a regexp.
844 while ((index = uri.indexOf(keyEncoded, index)) >= 0 &&
845 index < hashOrEndIndex) {
846 var precedingChar = uri.charCodeAt(index - 1);
847 // Ensure that the preceding character is '&' or '?'.
848 if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND ||
849 precedingChar == goog.uri.utils.CharCode_.QUESTION) {
850 // Ensure the following character is '&', '=', '#', or NaN
851 // (end of string).
852 var followingChar = uri.charCodeAt(index + keyLength);
853 if (!followingChar ||
854 followingChar == goog.uri.utils.CharCode_.EQUAL ||
855 followingChar == goog.uri.utils.CharCode_.AMPERSAND ||
856 followingChar == goog.uri.utils.CharCode_.HASH) {
857 return index;
858 }
859 }
860 index += keyLength + 1;
861 }
862
863 return -1;
864};
865
866
867/**
868 * Regular expression for finding a hash mark or end of string.
869 * @type {RegExp}
870 * @private
871 */
872goog.uri.utils.hashOrEndRe_ = /#|$/;
873
874
875/**
876 * Determines if the URI contains a specific key.
877 *
878 * Performs no object instantiations.
879 *
880 * @param {string} uri The URI to process. May contain a fragment
881 * identifier.
882 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
883 * @return {boolean} Whether the key is present.
884 */
885goog.uri.utils.hasParam = function(uri, keyEncoded) {
886 return goog.uri.utils.findParam_(uri, 0, keyEncoded,
887 uri.search(goog.uri.utils.hashOrEndRe_)) >= 0;
888};
889
890
891/**
892 * Gets the first value of a query parameter.
893 * @param {string} uri The URI to process. May contain a fragment.
894 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
895 * @return {?string} The first value of the parameter (URI-decoded), or null
896 * if the parameter is not found.
897 */
898goog.uri.utils.getParamValue = function(uri, keyEncoded) {
899 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
900 var foundIndex = goog.uri.utils.findParam_(
901 uri, 0, keyEncoded, hashOrEndIndex);
902
903 if (foundIndex < 0) {
904 return null;
905 } else {
906 var endPosition = uri.indexOf('&', foundIndex);
907 if (endPosition < 0 || endPosition > hashOrEndIndex) {
908 endPosition = hashOrEndIndex;
909 }
910 // Progress forth to the end of the "key=" or "key&" substring.
911 foundIndex += keyEncoded.length + 1;
912 // Use substr, because it (unlike substring) will return empty string
913 // if foundIndex > endPosition.
914 return goog.string.urlDecode(
915 uri.substr(foundIndex, endPosition - foundIndex));
916 }
917};
918
919
920/**
921 * Gets all values of a query parameter.
922 * @param {string} uri The URI to process. May contain a framgnet.
923 * @param {string} keyEncoded The URI-encoded key. Case-snsitive.
924 * @return {!Array<string>} All URI-decoded values with the given key.
925 * If the key is not found, this will have length 0, but never be null.
926 */
927goog.uri.utils.getParamValues = function(uri, keyEncoded) {
928 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
929 var position = 0;
930 var foundIndex;
931 var result = [];
932
933 while ((foundIndex = goog.uri.utils.findParam_(
934 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
935 // Find where this parameter ends, either the '&' or the end of the
936 // query parameters.
937 position = uri.indexOf('&', foundIndex);
938 if (position < 0 || position > hashOrEndIndex) {
939 position = hashOrEndIndex;
940 }
941
942 // Progress forth to the end of the "key=" or "key&" substring.
943 foundIndex += keyEncoded.length + 1;
944 // Use substr, because it (unlike substring) will return empty string
945 // if foundIndex > position.
946 result.push(goog.string.urlDecode(uri.substr(
947 foundIndex, position - foundIndex)));
948 }
949
950 return result;
951};
952
953
954/**
955 * Regexp to find trailing question marks and ampersands.
956 * @type {RegExp}
957 * @private
958 */
959goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/;
960
961
962/**
963 * Removes all instances of a query parameter.
964 * @param {string} uri The URI to process. Must not contain a fragment.
965 * @param {string} keyEncoded The URI-encoded key.
966 * @return {string} The URI with all instances of the parameter removed.
967 */
968goog.uri.utils.removeParam = function(uri, keyEncoded) {
969 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
970 var position = 0;
971 var foundIndex;
972 var buffer = [];
973
974 // Look for a query parameter.
975 while ((foundIndex = goog.uri.utils.findParam_(
976 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
977 // Get the portion of the query string up to, but not including, the ?
978 // or & starting the parameter.
979 buffer.push(uri.substring(position, foundIndex));
980 // Progress to immediately after the '&'. If not found, go to the end.
981 // Avoid including the hash mark.
982 position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex,
983 hashOrEndIndex);
984 }
985
986 // Append everything that is remaining.
987 buffer.push(uri.substr(position));
988
989 // Join the buffer, and remove trailing punctuation that remains.
990 return buffer.join('').replace(
991 goog.uri.utils.trailingQueryPunctuationRe_, '$1');
992};
993
994
995/**
996 * Replaces all existing definitions of a parameter with a single definition.
997 *
998 * Repeated calls to this can exhibit quadratic behavior due to the need to
999 * find existing instances and reconstruct the string, though it should be
1000 * limited given the 2kb limit. Consider using appendParams to append multiple
1001 * parameters in bulk.
1002 *
1003 * @param {string} uri The original URI, which may already have query data.
1004 * @param {string} keyEncoded The key, which must already be URI encoded.
1005 * @param {*} value The value, which will be stringized and encoded (assumed
1006 * not already to be encoded).
1007 * @return {string} The URI with the query parameter added.
1008 */
1009goog.uri.utils.setParam = function(uri, keyEncoded, value) {
1010 return goog.uri.utils.appendParam(
1011 goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value);
1012};
1013
1014
1015/**
1016 * Generates a URI path using a given URI and a path with checks to
1017 * prevent consecutive "//". The baseUri passed in must not contain
1018 * query or fragment identifiers. The path to append may not contain query or
1019 * fragment identifiers.
1020 *
1021 * @param {string} baseUri URI to use as the base.
1022 * @param {string} path Path to append.
1023 * @return {string} Updated URI.
1024 */
1025goog.uri.utils.appendPath = function(baseUri, path) {
1026 goog.uri.utils.assertNoFragmentsOrQueries_(baseUri);
1027
1028 // Remove any trailing '/'
1029 if (goog.string.endsWith(baseUri, '/')) {
1030 baseUri = baseUri.substr(0, baseUri.length - 1);
1031 }
1032 // Remove any leading '/'
1033 if (goog.string.startsWith(path, '/')) {
1034 path = path.substr(1);
1035 }
1036 return goog.string.buildString(baseUri, '/', path);
1037};
1038
1039
1040/**
1041 * Replaces the path.
1042 * @param {string} uri URI to use as the base.
1043 * @param {string} path New path.
1044 * @return {string} Updated URI.
1045 */
1046goog.uri.utils.setPath = function(uri, path) {
1047 // Add any missing '/'.
1048 if (!goog.string.startsWith(path, '/')) {
1049 path = '/' + path;
1050 }
1051 var parts = goog.uri.utils.split(uri);
1052 return goog.uri.utils.buildFromEncodedParts(
1053 parts[goog.uri.utils.ComponentIndex.SCHEME],
1054 parts[goog.uri.utils.ComponentIndex.USER_INFO],
1055 parts[goog.uri.utils.ComponentIndex.DOMAIN],
1056 parts[goog.uri.utils.ComponentIndex.PORT],
1057 path,
1058 parts[goog.uri.utils.ComponentIndex.QUERY_DATA],
1059 parts[goog.uri.utils.ComponentIndex.FRAGMENT]);
1060};
1061
1062
1063/**
1064 * Standard supported query parameters.
1065 * @enum {string}
1066 */
1067goog.uri.utils.StandardQueryParam = {
1068
1069 /** Unused parameter for unique-ifying. */
1070 RANDOM: 'zx'
1071};
1072
1073
1074/**
1075 * Sets the zx parameter of a URI to a random value.
1076 * @param {string} uri Any URI.
1077 * @return {string} That URI with the "zx" parameter added or replaced to
1078 * contain a random string.
1079 */
1080goog.uri.utils.makeUnique = function(uri) {
1081 return goog.uri.utils.setParam(uri,
1082 goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString());
1083};