lib/goog/uri/utils.js

1// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Simple utilities for dealing with URI strings.
17 *
18 * This is intended to be a lightweight alternative to constructing goog.Uri
19 * objects. Whereas goog.Uri adds several kilobytes to the binary regardless
20 * of how much of its functionality you use, this is designed to be a set of
21 * mostly-independent utilities so that the compiler includes only what is
22 * necessary for the task. Estimated savings of porting is 5k pre-gzip and
23 * 1.5k post-gzip. To ensure the savings remain, future developers should
24 * avoid adding new functionality to existing functions, but instead create
25 * new ones and factor out shared code.
26 *
27 * Many of these utilities have limited functionality, tailored to common
28 * cases. The query parameter utilities assume that the parameter keys are
29 * already encoded, since most keys are compile-time alphanumeric strings. The
30 * query parameter mutation utilities also do not tolerate fragment identifiers.
31 *
32 * By design, these functions can be slower than goog.Uri equivalents.
33 * Repeated calls to some of functions may be quadratic in behavior for IE,
34 * although the effect is somewhat limited given the 2kb limit.
35 *
36 * One advantage of the limited functionality here is that this approach is
37 * less sensitive to differences in URI encodings than goog.Uri, since these
38 * functions modify the strings in place, rather than decoding and
39 * re-encoding.
40 *
41 * Uses features of RFC 3986 for parsing/formatting URIs:
42 * http://www.ietf.org/rfc/rfc3986.txt
43 *
44 * @author gboyer@google.com (Garrett Boyer) - The "lightened" design.
45 * @author msamuel@google.com (Mike Samuel) - Domain knowledge and regexes.
46 */
47
48goog.provide('goog.uri.utils');
49goog.provide('goog.uri.utils.ComponentIndex');
50goog.provide('goog.uri.utils.QueryArray');
51goog.provide('goog.uri.utils.QueryValue');
52goog.provide('goog.uri.utils.StandardQueryParam');
53
54goog.require('goog.asserts');
55goog.require('goog.string');
56goog.require('goog.userAgent');
57
58
59/**
60 * Character codes inlined to avoid object allocations due to charCode.
61 * @enum {number}
62 * @private
63 */
64goog.uri.utils.CharCode_ = {
65 AMPERSAND: 38,
66 EQUAL: 61,
67 HASH: 35,
68 QUESTION: 63
69};
70
71
72/**
73 * Builds a URI string from already-encoded parts.
74 *
75 * No encoding is performed. Any component may be omitted as either null or
76 * undefined.
77 *
78 * @param {?string=} opt_scheme The scheme such as 'http'.
79 * @param {?string=} opt_userInfo The user name before the '@'.
80 * @param {?string=} opt_domain The domain such as 'www.google.com', already
81 * URI-encoded.
82 * @param {(string|number|null)=} opt_port The port number.
83 * @param {?string=} opt_path The path, already URI-encoded. If it is not
84 * empty, it must begin with a slash.
85 * @param {?string=} opt_queryData The URI-encoded query data.
86 * @param {?string=} opt_fragment The URI-encoded fragment identifier.
87 * @return {string} The fully combined URI.
88 */
89goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo,
90 opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) {
91 var out = '';
92
93 if (opt_scheme) {
94 out += opt_scheme + ':';
95 }
96
97 if (opt_domain) {
98 out += '//';
99
100 if (opt_userInfo) {
101 out += opt_userInfo + '@';
102 }
103
104 out += opt_domain;
105
106 if (opt_port) {
107 out += ':' + opt_port;
108 }
109 }
110
111 if (opt_path) {
112 out += opt_path;
113 }
114
115 if (opt_queryData) {
116 out += '?' + opt_queryData;
117 }
118
119 if (opt_fragment) {
120 out += '#' + opt_fragment;
121 }
122
123 return out;
124};
125
126
127/**
128 * A regular expression for breaking a URI into its component parts.
129 *
130 * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B
131 * As the "first-match-wins" algorithm is identical to the "greedy"
132 * disambiguation method used by POSIX regular expressions, it is natural and
133 * commonplace to use a regular expression for parsing the potential five
134 * components of a URI reference.
135 *
136 * The following line is the regular expression for breaking-down a
137 * well-formed URI reference into its components.
138 *
139 * <pre>
140 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
141 * 12 3 4 5 6 7 8 9
142 * </pre>
143 *
144 * The numbers in the second line above are only to assist readability; they
145 * indicate the reference points for each subexpression (i.e., each paired
146 * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
147 * For example, matching the above expression to
148 * <pre>
149 * http://www.ics.uci.edu/pub/ietf/uri/#Related
150 * </pre>
151 * results in the following subexpression matches:
152 * <pre>
153 * $1 = http:
154 * $2 = http
155 * $3 = //www.ics.uci.edu
156 * $4 = www.ics.uci.edu
157 * $5 = /pub/ietf/uri/
158 * $6 = <undefined>
159 * $7 = <undefined>
160 * $8 = #Related
161 * $9 = Related
162 * </pre>
163 * where <undefined> indicates that the component is not present, as is the
164 * case for the query component in the above example. Therefore, we can
165 * determine the value of the five components as
166 * <pre>
167 * scheme = $2
168 * authority = $4
169 * path = $5
170 * query = $7
171 * fragment = $9
172 * </pre>
173 *
174 * The regular expression has been modified slightly to expose the
175 * userInfo, domain, and port separately from the authority.
176 * The modified version yields
177 * <pre>
178 * $1 = http scheme
179 * $2 = <undefined> userInfo -\
180 * $3 = www.ics.uci.edu domain | authority
181 * $4 = <undefined> port -/
182 * $5 = /pub/ietf/uri/ path
183 * $6 = <undefined> query without ?
184 * $7 = Related fragment without #
185 * </pre>
186 * @type {!RegExp}
187 * @private
188 */
189goog.uri.utils.splitRe_ = new RegExp(
190 '^' +
191 '(?:' +
192 '([^:/?#.]+)' + // scheme - ignore special characters
193 // used by other URL parts such as :,
194 // ?, /, #, and .
195 ':)?' +
196 '(?://' +
197 '(?:([^/?#]*)@)?' + // userInfo
198 '([^/#?]*?)' + // domain
199 '(?::([0-9]+))?' + // port
200 '(?=[/#?]|$)' + // authority-terminating character
201 ')?' +
202 '([^?#]+)?' + // path
203 '(?:\\?([^#]*))?' + // query
204 '(?:#(.*))?' + // fragment
205 '$');
206
207
208/**
209 * The index of each URI component in the return value of goog.uri.utils.split.
210 * @enum {number}
211 */
212goog.uri.utils.ComponentIndex = {
213 SCHEME: 1,
214 USER_INFO: 2,
215 DOMAIN: 3,
216 PORT: 4,
217 PATH: 5,
218 QUERY_DATA: 6,
219 FRAGMENT: 7
220};
221
222
223/**
224 * Splits a URI into its component parts.
225 *
226 * Each component can be accessed via the component indices; for example:
227 * <pre>
228 * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA];
229 * </pre>
230 *
231 * @param {string} uri The URI string to examine.
232 * @return {!Array.<string|undefined>} Each component still URI-encoded.
233 * Each component that is present will contain the encoded value, whereas
234 * components that are not present will be undefined or empty, depending
235 * on the browser's regular expression implementation. Never null, since
236 * arbitrary strings may still look like path names.
237 */
238goog.uri.utils.split = function(uri) {
239 goog.uri.utils.phishingProtection_();
240
241 // See @return comment -- never null.
242 return /** @type {!Array.<string|undefined>} */ (
243 uri.match(goog.uri.utils.splitRe_));
244};
245
246
247/**
248 * Safari has a nasty bug where if you have an http URL with a username, e.g.,
249 * http://evil.com%2F@google.com/
250 * Safari will report that window.location.href is
251 * http://evil.com/google.com/
252 * so that anyone who tries to parse the domain of that URL will get
253 * the wrong domain. We've seen exploits where people use this to trick
254 * Safari into loading resources from evil domains.
255 *
256 * To work around this, we run a little "Safari phishing check", and throw
257 * an exception if we see this happening.
258 *
259 * There is no convenient place to put this check. We apply it to
260 * anyone doing URI parsing on Webkit. We're not happy about this, but
261 * it fixes the problem.
262 *
263 * This should be removed once Safari fixes their bug.
264 *
265 * Exploit reported by Masato Kinugawa.
266 *
267 * @type {boolean}
268 * @private
269 */
270goog.uri.utils.needsPhishingProtection_ = goog.userAgent.WEBKIT;
271
272
273/**
274 * Check to see if the user is being phished.
275 * @private
276 */
277goog.uri.utils.phishingProtection_ = function() {
278 if (goog.uri.utils.needsPhishingProtection_) {
279 // Turn protection off, so that we don't recurse.
280 goog.uri.utils.needsPhishingProtection_ = false;
281
282 // Use quoted access, just in case the user isn't using location externs.
283 var location = goog.global['location'];
284 if (location) {
285 var href = location['href'];
286 if (href) {
287 var domain = goog.uri.utils.getDomain(href);
288 if (domain && domain != location['hostname']) {
289 // Phishing attack
290 goog.uri.utils.needsPhishingProtection_ = true;
291 throw Error();
292 }
293 }
294 }
295 }
296};
297
298
299/**
300 * @param {?string} uri A possibly null string.
301 * @return {?string} The string URI-decoded, or null if uri is null.
302 * @private
303 */
304goog.uri.utils.decodeIfPossible_ = function(uri) {
305 return uri && decodeURIComponent(uri);
306};
307
308
309/**
310 * Gets a URI component by index.
311 *
312 * It is preferred to use the getPathEncoded() variety of functions ahead,
313 * since they are more readable.
314 *
315 * @param {goog.uri.utils.ComponentIndex} componentIndex The component index.
316 * @param {string} uri The URI to examine.
317 * @return {?string} The still-encoded component, or null if the component
318 * is not present.
319 * @private
320 */
321goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) {
322 // Convert undefined, null, and empty string into null.
323 return goog.uri.utils.split(uri)[componentIndex] || null;
324};
325
326
327/**
328 * @param {string} uri The URI to examine.
329 * @return {?string} The protocol or scheme, or null if none. Does not
330 * include trailing colons or slashes.
331 */
332goog.uri.utils.getScheme = function(uri) {
333 return goog.uri.utils.getComponentByIndex_(
334 goog.uri.utils.ComponentIndex.SCHEME, uri);
335};
336
337
338/**
339 * Gets the effective scheme for the URL. If the URL is relative then the
340 * scheme is derived from the page's location.
341 * @param {string} uri The URI to examine.
342 * @return {string} The protocol or scheme, always lower case.
343 */
344goog.uri.utils.getEffectiveScheme = function(uri) {
345 var scheme = goog.uri.utils.getScheme(uri);
346 if (!scheme && self.location) {
347 var protocol = self.location.protocol;
348 scheme = protocol.substr(0, protocol.length - 1);
349 }
350 // NOTE: When called from a web worker in Firefox 3.5, location maybe null.
351 // All other browsers with web workers support self.location from the worker.
352 return scheme ? scheme.toLowerCase() : '';
353};
354
355
356/**
357 * @param {string} uri The URI to examine.
358 * @return {?string} The user name still encoded, or null if none.
359 */
360goog.uri.utils.getUserInfoEncoded = function(uri) {
361 return goog.uri.utils.getComponentByIndex_(
362 goog.uri.utils.ComponentIndex.USER_INFO, uri);
363};
364
365
366/**
367 * @param {string} uri The URI to examine.
368 * @return {?string} The decoded user info, or null if none.
369 */
370goog.uri.utils.getUserInfo = function(uri) {
371 return goog.uri.utils.decodeIfPossible_(
372 goog.uri.utils.getUserInfoEncoded(uri));
373};
374
375
376/**
377 * @param {string} uri The URI to examine.
378 * @return {?string} The domain name still encoded, or null if none.
379 */
380goog.uri.utils.getDomainEncoded = function(uri) {
381 return goog.uri.utils.getComponentByIndex_(
382 goog.uri.utils.ComponentIndex.DOMAIN, uri);
383};
384
385
386/**
387 * @param {string} uri The URI to examine.
388 * @return {?string} The decoded domain, or null if none.
389 */
390goog.uri.utils.getDomain = function(uri) {
391 return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getDomainEncoded(uri));
392};
393
394
395/**
396 * @param {string} uri The URI to examine.
397 * @return {?number} The port number, or null if none.
398 */
399goog.uri.utils.getPort = function(uri) {
400 // Coerce to a number. If the result of getComponentByIndex_ is null or
401 // non-numeric, the number coersion yields NaN. This will then return
402 // null for all non-numeric cases (though also zero, which isn't a relevant
403 // port number).
404 return Number(goog.uri.utils.getComponentByIndex_(
405 goog.uri.utils.ComponentIndex.PORT, uri)) || null;
406};
407
408
409/**
410 * @param {string} uri The URI to examine.
411 * @return {?string} The path still encoded, or null if none. Includes the
412 * leading slash, if any.
413 */
414goog.uri.utils.getPathEncoded = function(uri) {
415 return goog.uri.utils.getComponentByIndex_(
416 goog.uri.utils.ComponentIndex.PATH, uri);
417};
418
419
420/**
421 * @param {string} uri The URI to examine.
422 * @return {?string} The decoded path, or null if none. Includes the leading
423 * slash, if any.
424 */
425goog.uri.utils.getPath = function(uri) {
426 return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getPathEncoded(uri));
427};
428
429
430/**
431 * @param {string} uri The URI to examine.
432 * @return {?string} The query data still encoded, or null if none. Does not
433 * include the question mark itself.
434 */
435goog.uri.utils.getQueryData = function(uri) {
436 return goog.uri.utils.getComponentByIndex_(
437 goog.uri.utils.ComponentIndex.QUERY_DATA, uri);
438};
439
440
441/**
442 * @param {string} uri The URI to examine.
443 * @return {?string} The fragment identifier, or null if none. Does not
444 * include the hash mark itself.
445 */
446goog.uri.utils.getFragmentEncoded = function(uri) {
447 // The hash mark may not appear in any other part of the URL.
448 var hashIndex = uri.indexOf('#');
449 return hashIndex < 0 ? null : uri.substr(hashIndex + 1);
450};
451
452
453/**
454 * @param {string} uri The URI to examine.
455 * @param {?string} fragment The encoded fragment identifier, or null if none.
456 * Does not include the hash mark itself.
457 * @return {string} The URI with the fragment set.
458 */
459goog.uri.utils.setFragmentEncoded = function(uri, fragment) {
460 return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : '');
461};
462
463
464/**
465 * @param {string} uri The URI to examine.
466 * @return {?string} The decoded fragment identifier, or null if none. Does
467 * not include the hash mark.
468 */
469goog.uri.utils.getFragment = function(uri) {
470 return goog.uri.utils.decodeIfPossible_(
471 goog.uri.utils.getFragmentEncoded(uri));
472};
473
474
475/**
476 * Extracts everything up to the port of the URI.
477 * @param {string} uri The URI string.
478 * @return {string} Everything up to and including the port.
479 */
480goog.uri.utils.getHost = function(uri) {
481 var pieces = goog.uri.utils.split(uri);
482 return goog.uri.utils.buildFromEncodedParts(
483 pieces[goog.uri.utils.ComponentIndex.SCHEME],
484 pieces[goog.uri.utils.ComponentIndex.USER_INFO],
485 pieces[goog.uri.utils.ComponentIndex.DOMAIN],
486 pieces[goog.uri.utils.ComponentIndex.PORT]);
487};
488
489
490/**
491 * Extracts the path of the URL and everything after.
492 * @param {string} uri The URI string.
493 * @return {string} The URI, starting at the path and including the query
494 * parameters and fragment identifier.
495 */
496goog.uri.utils.getPathAndAfter = function(uri) {
497 var pieces = goog.uri.utils.split(uri);
498 return goog.uri.utils.buildFromEncodedParts(null, null, null, null,
499 pieces[goog.uri.utils.ComponentIndex.PATH],
500 pieces[goog.uri.utils.ComponentIndex.QUERY_DATA],
501 pieces[goog.uri.utils.ComponentIndex.FRAGMENT]);
502};
503
504
505/**
506 * Gets the URI with the fragment identifier removed.
507 * @param {string} uri The URI to examine.
508 * @return {string} Everything preceding the hash mark.
509 */
510goog.uri.utils.removeFragment = function(uri) {
511 // The hash mark may not appear in any other part of the URL.
512 var hashIndex = uri.indexOf('#');
513 return hashIndex < 0 ? uri : uri.substr(0, hashIndex);
514};
515
516
517/**
518 * Ensures that two URI's have the exact same domain, scheme, and port.
519 *
520 * Unlike the version in goog.Uri, this checks protocol, and therefore is
521 * suitable for checking against the browser's same-origin policy.
522 *
523 * @param {string} uri1 The first URI.
524 * @param {string} uri2 The second URI.
525 * @return {boolean} Whether they have the same domain and port.
526 */
527goog.uri.utils.haveSameDomain = function(uri1, uri2) {
528 var pieces1 = goog.uri.utils.split(uri1);
529 var pieces2 = goog.uri.utils.split(uri2);
530 return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] ==
531 pieces2[goog.uri.utils.ComponentIndex.DOMAIN] &&
532 pieces1[goog.uri.utils.ComponentIndex.SCHEME] ==
533 pieces2[goog.uri.utils.ComponentIndex.SCHEME] &&
534 pieces1[goog.uri.utils.ComponentIndex.PORT] ==
535 pieces2[goog.uri.utils.ComponentIndex.PORT];
536};
537
538
539/**
540 * Asserts that there are no fragment or query identifiers, only in uncompiled
541 * mode.
542 * @param {string} uri The URI to examine.
543 * @private
544 */
545goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) {
546 // NOTE: would use goog.asserts here, but jscompiler doesn't know that
547 // indexOf has no side effects.
548 if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) {
549 throw Error('goog.uri.utils: Fragment or query identifiers are not ' +
550 'supported: [' + uri + ']');
551 }
552};
553
554
555/**
556 * Supported query parameter values by the parameter serializing utilities.
557 *
558 * If a value is null or undefined, the key-value pair is skipped, as an easy
559 * way to omit parameters conditionally. Non-array parameters are converted
560 * to a string and URI encoded. Array values are expanded into multiple
561 * &key=value pairs, with each element stringized and URI-encoded.
562 *
563 * @typedef {*}
564 */
565goog.uri.utils.QueryValue;
566
567
568/**
569 * An array representing a set of query parameters with alternating keys
570 * and values.
571 *
572 * Keys are assumed to be URI encoded already and live at even indices. See
573 * goog.uri.utils.QueryValue for details on how parameter values are encoded.
574 *
575 * Example:
576 * <pre>
577 * var data = [
578 * // Simple param: ?name=BobBarker
579 * 'name', 'BobBarker',
580 * // Conditional param -- may be omitted entirely.
581 * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null,
582 * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null
583 * 'house', ['LosAngeles', 'NewYork', null]
584 * ];
585 * </pre>
586 *
587 * @typedef {!Array.<string|goog.uri.utils.QueryValue>}
588 */
589goog.uri.utils.QueryArray;
590
591
592/**
593 * Appends a URI and query data in a string buffer with special preconditions.
594 *
595 * Internal implementation utility, performing very few object allocations.
596 *
597 * @param {!Array.<string|undefined>} buffer A string buffer. The first element
598 * must be the base URI, and may have a fragment identifier. If the array
599 * contains more than one element, the second element must be an ampersand,
600 * and may be overwritten, depending on the base URI. Undefined elements
601 * are treated as empty-string.
602 * @return {string} The concatenated URI and query data.
603 * @private
604 */
605goog.uri.utils.appendQueryData_ = function(buffer) {
606 if (buffer[1]) {
607 // At least one query parameter was added. We need to check the
608 // punctuation mark, which is currently an ampersand, and also make sure
609 // there aren't any interfering fragment identifiers.
610 var baseUri = /** @type {string} */ (buffer[0]);
611 var hashIndex = baseUri.indexOf('#');
612 if (hashIndex >= 0) {
613 // Move the fragment off the base part of the URI into the end.
614 buffer.push(baseUri.substr(hashIndex));
615 buffer[0] = baseUri = baseUri.substr(0, hashIndex);
616 }
617 var questionIndex = baseUri.indexOf('?');
618 if (questionIndex < 0) {
619 // No question mark, so we need a question mark instead of an ampersand.
620 buffer[1] = '?';
621 } else if (questionIndex == baseUri.length - 1) {
622 // Question mark is the very last character of the existing URI, so don't
623 // append an additional delimiter.
624 buffer[1] = undefined;
625 }
626 }
627
628 return buffer.join('');
629};
630
631
632/**
633 * Appends key=value pairs to an array, supporting multi-valued objects.
634 * @param {string} key The key prefix.
635 * @param {goog.uri.utils.QueryValue} value The value to serialize.
636 * @param {!Array.<string>} pairs The array to which the 'key=value' strings
637 * should be appended.
638 * @private
639 */
640goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) {
641 if (goog.isArray(value)) {
642 // Convince the compiler it's an array.
643 goog.asserts.assertArray(value);
644 for (var j = 0; j < value.length; j++) {
645 // Convert to string explicitly, to short circuit the null and array
646 // logic in this function -- this ensures that null and undefined get
647 // written as literal 'null' and 'undefined', and arrays don't get
648 // expanded out but instead encoded in the default way.
649 goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs);
650 }
651 } else if (value != null) {
652 // Skip a top-level null or undefined entirely.
653 pairs.push('&', key,
654 // Check for empty string. Zero gets encoded into the url as literal
655 // strings. For empty string, skip the equal sign, to be consistent
656 // with UriBuilder.java.
657 value === '' ? '' : '=',
658 goog.string.urlEncode(value));
659 }
660};
661
662
663/**
664 * Builds a buffer of query data from a sequence of alternating keys and values.
665 *
666 * @param {!Array.<string|undefined>} buffer A string buffer to append to. The
667 * first element appended will be an '&', and may be replaced by the caller.
668 * @param {goog.uri.utils.QueryArray|Arguments} keysAndValues An array with
669 * alternating keys and values -- see the typedef.
670 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
671 * @return {!Array.<string|undefined>} The buffer argument.
672 * @private
673 */
674goog.uri.utils.buildQueryDataBuffer_ = function(
675 buffer, keysAndValues, opt_startIndex) {
676 goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0),
677 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.');
678
679 for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) {
680 goog.uri.utils.appendKeyValuePairs_(
681 keysAndValues[i], keysAndValues[i + 1], buffer);
682 }
683
684 return buffer;
685};
686
687
688/**
689 * Builds a query data string from a sequence of alternating keys and values.
690 * Currently generates "&key&" for empty args.
691 *
692 * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and
693 * values. See the typedef.
694 * @param {number=} opt_startIndex A start offset into the arary, defaults to 0.
695 * @return {string} The encoded query string, in the form 'a=1&b=2'.
696 */
697goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) {
698 var buffer = goog.uri.utils.buildQueryDataBuffer_(
699 [], keysAndValues, opt_startIndex);
700 buffer[0] = ''; // Remove the leading ampersand.
701 return buffer.join('');
702};
703
704
705/**
706 * Builds a buffer of query data from a map.
707 *
708 * @param {!Array.<string|undefined>} buffer A string buffer to append to. The
709 * first element appended will be an '&', and may be replaced by the caller.
710 * @param {Object.<goog.uri.utils.QueryValue>} map An object where keys are
711 * URI-encoded parameter keys, and the values conform to the contract
712 * specified in the goog.uri.utils.QueryValue typedef.
713 * @return {!Array.<string|undefined>} The buffer argument.
714 * @private
715 */
716goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) {
717 for (var key in map) {
718 goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer);
719 }
720
721 return buffer;
722};
723
724
725/**
726 * Builds a query data string from a map.
727 * Currently generates "&key&" for empty args.
728 *
729 * @param {Object} map An object where keys are URI-encoded parameter keys,
730 * and the values are arbitrary types or arrays. Keys with a null value
731 * are dropped.
732 * @return {string} The encoded query string, in the form 'a=1&b=2'.
733 */
734goog.uri.utils.buildQueryDataFromMap = function(map) {
735 var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map);
736 buffer[0] = '';
737 return buffer.join('');
738};
739
740
741/**
742 * Appends URI parameters to an existing URI.
743 *
744 * The variable arguments may contain alternating keys and values. Keys are
745 * assumed to be already URI encoded. The values should not be URI-encoded,
746 * and will instead be encoded by this function.
747 * <pre>
748 * appendParams('http://www.foo.com?existing=true',
749 * 'key1', 'value1',
750 * 'key2', 'value?willBeEncoded',
751 * 'key3', ['valueA', 'valueB', 'valueC'],
752 * 'key4', null);
753 * result: 'http://www.foo.com?existing=true&' +
754 * 'key1=value1&' +
755 * 'key2=value%3FwillBeEncoded&' +
756 * 'key3=valueA&key3=valueB&key3=valueC'
757 * </pre>
758 *
759 * A single call to this function will not exhibit quadratic behavior in IE,
760 * whereas multiple repeated calls may, although the effect is limited by
761 * fact that URL's generally can't exceed 2kb.
762 *
763 * @param {string} uri The original URI, which may already have query data.
764 * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args
765 * An array or argument list conforming to goog.uri.utils.QueryArray.
766 * @return {string} The URI with all query parameters added.
767 */
768goog.uri.utils.appendParams = function(uri, var_args) {
769 return goog.uri.utils.appendQueryData_(
770 arguments.length == 2 ?
771 goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) :
772 goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1));
773};
774
775
776/**
777 * Appends query parameters from a map.
778 *
779 * @param {string} uri The original URI, which may already have query data.
780 * @param {Object} map An object where keys are URI-encoded parameter keys,
781 * and the values are arbitrary types or arrays. Keys with a null value
782 * are dropped.
783 * @return {string} The new parameters.
784 */
785goog.uri.utils.appendParamsFromMap = function(uri, map) {
786 return goog.uri.utils.appendQueryData_(
787 goog.uri.utils.buildQueryDataBufferFromMap_([uri], map));
788};
789
790
791/**
792 * Appends a single URI parameter.
793 *
794 * Repeated calls to this can exhibit quadratic behavior in IE6 due to the
795 * way string append works, though it should be limited given the 2kb limit.
796 *
797 * @param {string} uri The original URI, which may already have query data.
798 * @param {string} key The key, which must already be URI encoded.
799 * @param {*=} opt_value The value, which will be stringized and encoded
800 * (assumed not already to be encoded). If omitted, undefined, or null, the
801 * key will be added as a valueless parameter.
802 * @return {string} The URI with the query parameter added.
803 */
804goog.uri.utils.appendParam = function(uri, key, opt_value) {
805 var paramArr = [uri, '&', key];
806 if (goog.isDefAndNotNull(opt_value)) {
807 paramArr.push('=', goog.string.urlEncode(opt_value));
808 }
809 return goog.uri.utils.appendQueryData_(paramArr);
810};
811
812
813/**
814 * Finds the next instance of a query parameter with the specified name.
815 *
816 * Does not instantiate any objects.
817 *
818 * @param {string} uri The URI to search. May contain a fragment identifier
819 * if opt_hashIndex is specified.
820 * @param {number} startIndex The index to begin searching for the key at. A
821 * match may be found even if this is one character after the ampersand.
822 * @param {string} keyEncoded The URI-encoded key.
823 * @param {number} hashOrEndIndex Index to stop looking at. If a hash
824 * mark is present, it should be its index, otherwise it should be the
825 * length of the string.
826 * @return {number} The position of the first character in the key's name,
827 * immediately after either a question mark or a dot.
828 * @private
829 */
830goog.uri.utils.findParam_ = function(
831 uri, startIndex, keyEncoded, hashOrEndIndex) {
832 var index = startIndex;
833 var keyLength = keyEncoded.length;
834
835 // Search for the key itself and post-filter for surronuding punctuation,
836 // rather than expensively building a regexp.
837 while ((index = uri.indexOf(keyEncoded, index)) >= 0 &&
838 index < hashOrEndIndex) {
839 var precedingChar = uri.charCodeAt(index - 1);
840 // Ensure that the preceding character is '&' or '?'.
841 if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND ||
842 precedingChar == goog.uri.utils.CharCode_.QUESTION) {
843 // Ensure the following character is '&', '=', '#', or NaN
844 // (end of string).
845 var followingChar = uri.charCodeAt(index + keyLength);
846 if (!followingChar ||
847 followingChar == goog.uri.utils.CharCode_.EQUAL ||
848 followingChar == goog.uri.utils.CharCode_.AMPERSAND ||
849 followingChar == goog.uri.utils.CharCode_.HASH) {
850 return index;
851 }
852 }
853 index += keyLength + 1;
854 }
855
856 return -1;
857};
858
859
860/**
861 * Regular expression for finding a hash mark or end of string.
862 * @type {RegExp}
863 * @private
864 */
865goog.uri.utils.hashOrEndRe_ = /#|$/;
866
867
868/**
869 * Determines if the URI contains a specific key.
870 *
871 * Performs no object instantiations.
872 *
873 * @param {string} uri The URI to process. May contain a fragment
874 * identifier.
875 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
876 * @return {boolean} Whether the key is present.
877 */
878goog.uri.utils.hasParam = function(uri, keyEncoded) {
879 return goog.uri.utils.findParam_(uri, 0, keyEncoded,
880 uri.search(goog.uri.utils.hashOrEndRe_)) >= 0;
881};
882
883
884/**
885 * Gets the first value of a query parameter.
886 * @param {string} uri The URI to process. May contain a fragment.
887 * @param {string} keyEncoded The URI-encoded key. Case-sensitive.
888 * @return {?string} The first value of the parameter (URI-decoded), or null
889 * if the parameter is not found.
890 */
891goog.uri.utils.getParamValue = function(uri, keyEncoded) {
892 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
893 var foundIndex = goog.uri.utils.findParam_(
894 uri, 0, keyEncoded, hashOrEndIndex);
895
896 if (foundIndex < 0) {
897 return null;
898 } else {
899 var endPosition = uri.indexOf('&', foundIndex);
900 if (endPosition < 0 || endPosition > hashOrEndIndex) {
901 endPosition = hashOrEndIndex;
902 }
903 // Progress forth to the end of the "key=" or "key&" substring.
904 foundIndex += keyEncoded.length + 1;
905 // Use substr, because it (unlike substring) will return empty string
906 // if foundIndex > endPosition.
907 return goog.string.urlDecode(
908 uri.substr(foundIndex, endPosition - foundIndex));
909 }
910};
911
912
913/**
914 * Gets all values of a query parameter.
915 * @param {string} uri The URI to process. May contain a framgnet.
916 * @param {string} keyEncoded The URI-encoded key. Case-snsitive.
917 * @return {!Array.<string>} All URI-decoded values with the given key.
918 * If the key is not found, this will have length 0, but never be null.
919 */
920goog.uri.utils.getParamValues = function(uri, keyEncoded) {
921 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
922 var position = 0;
923 var foundIndex;
924 var result = [];
925
926 while ((foundIndex = goog.uri.utils.findParam_(
927 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
928 // Find where this parameter ends, either the '&' or the end of the
929 // query parameters.
930 position = uri.indexOf('&', foundIndex);
931 if (position < 0 || position > hashOrEndIndex) {
932 position = hashOrEndIndex;
933 }
934
935 // Progress forth to the end of the "key=" or "key&" substring.
936 foundIndex += keyEncoded.length + 1;
937 // Use substr, because it (unlike substring) will return empty string
938 // if foundIndex > position.
939 result.push(goog.string.urlDecode(uri.substr(
940 foundIndex, position - foundIndex)));
941 }
942
943 return result;
944};
945
946
947/**
948 * Regexp to find trailing question marks and ampersands.
949 * @type {RegExp}
950 * @private
951 */
952goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/;
953
954
955/**
956 * Removes all instances of a query parameter.
957 * @param {string} uri The URI to process. Must not contain a fragment.
958 * @param {string} keyEncoded The URI-encoded key.
959 * @return {string} The URI with all instances of the parameter removed.
960 */
961goog.uri.utils.removeParam = function(uri, keyEncoded) {
962 var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_);
963 var position = 0;
964 var foundIndex;
965 var buffer = [];
966
967 // Look for a query parameter.
968 while ((foundIndex = goog.uri.utils.findParam_(
969 uri, position, keyEncoded, hashOrEndIndex)) >= 0) {
970 // Get the portion of the query string up to, but not including, the ?
971 // or & starting the parameter.
972 buffer.push(uri.substring(position, foundIndex));
973 // Progress to immediately after the '&'. If not found, go to the end.
974 // Avoid including the hash mark.
975 position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex,
976 hashOrEndIndex);
977 }
978
979 // Append everything that is remaining.
980 buffer.push(uri.substr(position));
981
982 // Join the buffer, and remove trailing punctuation that remains.
983 return buffer.join('').replace(
984 goog.uri.utils.trailingQueryPunctuationRe_, '$1');
985};
986
987
988/**
989 * Replaces all existing definitions of a parameter with a single definition.
990 *
991 * Repeated calls to this can exhibit quadratic behavior due to the need to
992 * find existing instances and reconstruct the string, though it should be
993 * limited given the 2kb limit. Consider using appendParams to append multiple
994 * parameters in bulk.
995 *
996 * @param {string} uri The original URI, which may already have query data.
997 * @param {string} keyEncoded The key, which must already be URI encoded.
998 * @param {*} value The value, which will be stringized and encoded (assumed
999 * not already to be encoded).
1000 * @return {string} The URI with the query parameter added.
1001 */
1002goog.uri.utils.setParam = function(uri, keyEncoded, value) {
1003 return goog.uri.utils.appendParam(
1004 goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value);
1005};
1006
1007
1008/**
1009 * Generates a URI path using a given URI and a path with checks to
1010 * prevent consecutive "//". The baseUri passed in must not contain
1011 * query or fragment identifiers. The path to append may not contain query or
1012 * fragment identifiers.
1013 *
1014 * @param {string} baseUri URI to use as the base.
1015 * @param {string} path Path to append.
1016 * @return {string} Updated URI.
1017 */
1018goog.uri.utils.appendPath = function(baseUri, path) {
1019 goog.uri.utils.assertNoFragmentsOrQueries_(baseUri);
1020
1021 // Remove any trailing '/'
1022 if (goog.string.endsWith(baseUri, '/')) {
1023 baseUri = baseUri.substr(0, baseUri.length - 1);
1024 }
1025 // Remove any leading '/'
1026 if (goog.string.startsWith(path, '/')) {
1027 path = path.substr(1);
1028 }
1029 return goog.string.buildString(baseUri, '/', path);
1030};
1031
1032
1033/**
1034 * Replaces the path.
1035 * @param {string} uri URI to use as the base.
1036 * @param {string} path New path.
1037 * @return {string} Updated URI.
1038 */
1039goog.uri.utils.setPath = function(uri, path) {
1040 // Add any missing '/'.
1041 if (!goog.string.startsWith(path, '/')) {
1042 path = '/' + path;
1043 }
1044 var parts = goog.uri.utils.split(uri);
1045 return goog.uri.utils.buildFromEncodedParts(
1046 parts[goog.uri.utils.ComponentIndex.SCHEME],
1047 parts[goog.uri.utils.ComponentIndex.USER_INFO],
1048 parts[goog.uri.utils.ComponentIndex.DOMAIN],
1049 parts[goog.uri.utils.ComponentIndex.PORT],
1050 path,
1051 parts[goog.uri.utils.ComponentIndex.QUERY_DATA],
1052 parts[goog.uri.utils.ComponentIndex.FRAGMENT]);
1053};
1054
1055
1056/**
1057 * Standard supported query parameters.
1058 * @enum {string}
1059 */
1060goog.uri.utils.StandardQueryParam = {
1061
1062 /** Unused parameter for unique-ifying. */
1063 RANDOM: 'zx'
1064};
1065
1066
1067/**
1068 * Sets the zx parameter of a URI to a random value.
1069 * @param {string} uri Any URI.
1070 * @return {string} That URI with the "zx" parameter added or replaced to
1071 * contain a random string.
1072 */
1073goog.uri.utils.makeUnique = function(uri) {
1074 return goog.uri.utils.setParam(uri,
1075 goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString());
1076};