1 | // Copyright 2008 The Closure Library Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS-IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | /** |
16 | * @fileoverview Simple utilities for dealing with URI strings. |
17 | * |
18 | * This is intended to be a lightweight alternative to constructing goog.Uri |
19 | * objects. Whereas goog.Uri adds several kilobytes to the binary regardless |
20 | * of how much of its functionality you use, this is designed to be a set of |
21 | * mostly-independent utilities so that the compiler includes only what is |
22 | * necessary for the task. Estimated savings of porting is 5k pre-gzip and |
23 | * 1.5k post-gzip. To ensure the savings remain, future developers should |
24 | * avoid adding new functionality to existing functions, but instead create |
25 | * new ones and factor out shared code. |
26 | * |
27 | * Many of these utilities have limited functionality, tailored to common |
28 | * cases. The query parameter utilities assume that the parameter keys are |
29 | * already encoded, since most keys are compile-time alphanumeric strings. The |
30 | * query parameter mutation utilities also do not tolerate fragment identifiers. |
31 | * |
32 | * By design, these functions can be slower than goog.Uri equivalents. |
33 | * Repeated calls to some of functions may be quadratic in behavior for IE, |
34 | * although the effect is somewhat limited given the 2kb limit. |
35 | * |
36 | * One advantage of the limited functionality here is that this approach is |
37 | * less sensitive to differences in URI encodings than goog.Uri, since these |
38 | * functions modify the strings in place, rather than decoding and |
39 | * re-encoding. |
40 | * |
41 | * Uses features of RFC 3986 for parsing/formatting URIs: |
42 | * http://www.ietf.org/rfc/rfc3986.txt |
43 | * |
44 | * @author gboyer@google.com (Garrett Boyer) - The "lightened" design. |
45 | * @author msamuel@google.com (Mike Samuel) - Domain knowledge and regexes. |
46 | */ |
47 | |
48 | goog.provide('goog.uri.utils'); |
49 | goog.provide('goog.uri.utils.ComponentIndex'); |
50 | goog.provide('goog.uri.utils.QueryArray'); |
51 | goog.provide('goog.uri.utils.QueryValue'); |
52 | goog.provide('goog.uri.utils.StandardQueryParam'); |
53 | |
54 | goog.require('goog.asserts'); |
55 | goog.require('goog.string'); |
56 | goog.require('goog.userAgent'); |
57 | |
58 | |
59 | /** |
60 | * Character codes inlined to avoid object allocations due to charCode. |
61 | * @enum {number} |
62 | * @private |
63 | */ |
64 | goog.uri.utils.CharCode_ = { |
65 | AMPERSAND: 38, |
66 | EQUAL: 61, |
67 | HASH: 35, |
68 | QUESTION: 63 |
69 | }; |
70 | |
71 | |
72 | /** |
73 | * Builds a URI string from already-encoded parts. |
74 | * |
75 | * No encoding is performed. Any component may be omitted as either null or |
76 | * undefined. |
77 | * |
78 | * @param {?string=} opt_scheme The scheme such as 'http'. |
79 | * @param {?string=} opt_userInfo The user name before the '@'. |
80 | * @param {?string=} opt_domain The domain such as 'www.google.com', already |
81 | * URI-encoded. |
82 | * @param {(string|number|null)=} opt_port The port number. |
83 | * @param {?string=} opt_path The path, already URI-encoded. If it is not |
84 | * empty, it must begin with a slash. |
85 | * @param {?string=} opt_queryData The URI-encoded query data. |
86 | * @param {?string=} opt_fragment The URI-encoded fragment identifier. |
87 | * @return {string} The fully combined URI. |
88 | */ |
89 | goog.uri.utils.buildFromEncodedParts = function(opt_scheme, opt_userInfo, |
90 | opt_domain, opt_port, opt_path, opt_queryData, opt_fragment) { |
91 | var out = ''; |
92 | |
93 | if (opt_scheme) { |
94 | out += opt_scheme + ':'; |
95 | } |
96 | |
97 | if (opt_domain) { |
98 | out += '//'; |
99 | |
100 | if (opt_userInfo) { |
101 | out += opt_userInfo + '@'; |
102 | } |
103 | |
104 | out += opt_domain; |
105 | |
106 | if (opt_port) { |
107 | out += ':' + opt_port; |
108 | } |
109 | } |
110 | |
111 | if (opt_path) { |
112 | out += opt_path; |
113 | } |
114 | |
115 | if (opt_queryData) { |
116 | out += '?' + opt_queryData; |
117 | } |
118 | |
119 | if (opt_fragment) { |
120 | out += '#' + opt_fragment; |
121 | } |
122 | |
123 | return out; |
124 | }; |
125 | |
126 | |
127 | /** |
128 | * A regular expression for breaking a URI into its component parts. |
129 | * |
130 | * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B |
131 | * As the "first-match-wins" algorithm is identical to the "greedy" |
132 | * disambiguation method used by POSIX regular expressions, it is natural and |
133 | * commonplace to use a regular expression for parsing the potential five |
134 | * components of a URI reference. |
135 | * |
136 | * The following line is the regular expression for breaking-down a |
137 | * well-formed URI reference into its components. |
138 | * |
139 | * <pre> |
140 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
141 | * 12 3 4 5 6 7 8 9 |
142 | * </pre> |
143 | * |
144 | * The numbers in the second line above are only to assist readability; they |
145 | * indicate the reference points for each subexpression (i.e., each paired |
146 | * parenthesis). We refer to the value matched for subexpression <n> as $<n>. |
147 | * For example, matching the above expression to |
148 | * <pre> |
149 | * http://www.ics.uci.edu/pub/ietf/uri/#Related |
150 | * </pre> |
151 | * results in the following subexpression matches: |
152 | * <pre> |
153 | * $1 = http: |
154 | * $2 = http |
155 | * $3 = //www.ics.uci.edu |
156 | * $4 = www.ics.uci.edu |
157 | * $5 = /pub/ietf/uri/ |
158 | * $6 = <undefined> |
159 | * $7 = <undefined> |
160 | * $8 = #Related |
161 | * $9 = Related |
162 | * </pre> |
163 | * where <undefined> indicates that the component is not present, as is the |
164 | * case for the query component in the above example. Therefore, we can |
165 | * determine the value of the five components as |
166 | * <pre> |
167 | * scheme = $2 |
168 | * authority = $4 |
169 | * path = $5 |
170 | * query = $7 |
171 | * fragment = $9 |
172 | * </pre> |
173 | * |
174 | * The regular expression has been modified slightly to expose the |
175 | * userInfo, domain, and port separately from the authority. |
176 | * The modified version yields |
177 | * <pre> |
178 | * $1 = http scheme |
179 | * $2 = <undefined> userInfo -\ |
180 | * $3 = www.ics.uci.edu domain | authority |
181 | * $4 = <undefined> port -/ |
182 | * $5 = /pub/ietf/uri/ path |
183 | * $6 = <undefined> query without ? |
184 | * $7 = Related fragment without # |
185 | * </pre> |
186 | * @type {!RegExp} |
187 | * @private |
188 | */ |
189 | goog.uri.utils.splitRe_ = new RegExp( |
190 | '^' + |
191 | '(?:' + |
192 | '([^:/?#.]+)' + // scheme - ignore special characters |
193 | // used by other URL parts such as :, |
194 | // ?, /, #, and . |
195 | ':)?' + |
196 | '(?://' + |
197 | '(?:([^/?#]*)@)?' + // userInfo |
198 | '([^/#?]*?)' + // domain |
199 | '(?::([0-9]+))?' + // port |
200 | '(?=[/#?]|$)' + // authority-terminating character |
201 | ')?' + |
202 | '([^?#]+)?' + // path |
203 | '(?:\\?([^#]*))?' + // query |
204 | '(?:#(.*))?' + // fragment |
205 | '$'); |
206 | |
207 | |
208 | /** |
209 | * The index of each URI component in the return value of goog.uri.utils.split. |
210 | * @enum {number} |
211 | */ |
212 | goog.uri.utils.ComponentIndex = { |
213 | SCHEME: 1, |
214 | USER_INFO: 2, |
215 | DOMAIN: 3, |
216 | PORT: 4, |
217 | PATH: 5, |
218 | QUERY_DATA: 6, |
219 | FRAGMENT: 7 |
220 | }; |
221 | |
222 | |
223 | /** |
224 | * Splits a URI into its component parts. |
225 | * |
226 | * Each component can be accessed via the component indices; for example: |
227 | * <pre> |
228 | * goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA]; |
229 | * </pre> |
230 | * |
231 | * @param {string} uri The URI string to examine. |
232 | * @return {!Array.<string|undefined>} Each component still URI-encoded. |
233 | * Each component that is present will contain the encoded value, whereas |
234 | * components that are not present will be undefined or empty, depending |
235 | * on the browser's regular expression implementation. Never null, since |
236 | * arbitrary strings may still look like path names. |
237 | */ |
238 | goog.uri.utils.split = function(uri) { |
239 | goog.uri.utils.phishingProtection_(); |
240 | |
241 | // See @return comment -- never null. |
242 | return /** @type {!Array.<string|undefined>} */ ( |
243 | uri.match(goog.uri.utils.splitRe_)); |
244 | }; |
245 | |
246 | |
247 | /** |
248 | * Safari has a nasty bug where if you have an http URL with a username, e.g., |
249 | * http://evil.com%2F@google.com/ |
250 | * Safari will report that window.location.href is |
251 | * http://evil.com/google.com/ |
252 | * so that anyone who tries to parse the domain of that URL will get |
253 | * the wrong domain. We've seen exploits where people use this to trick |
254 | * Safari into loading resources from evil domains. |
255 | * |
256 | * To work around this, we run a little "Safari phishing check", and throw |
257 | * an exception if we see this happening. |
258 | * |
259 | * There is no convenient place to put this check. We apply it to |
260 | * anyone doing URI parsing on Webkit. We're not happy about this, but |
261 | * it fixes the problem. |
262 | * |
263 | * This should be removed once Safari fixes their bug. |
264 | * |
265 | * Exploit reported by Masato Kinugawa. |
266 | * |
267 | * @type {boolean} |
268 | * @private |
269 | */ |
270 | goog.uri.utils.needsPhishingProtection_ = goog.userAgent.WEBKIT; |
271 | |
272 | |
273 | /** |
274 | * Check to see if the user is being phished. |
275 | * @private |
276 | */ |
277 | goog.uri.utils.phishingProtection_ = function() { |
278 | if (goog.uri.utils.needsPhishingProtection_) { |
279 | // Turn protection off, so that we don't recurse. |
280 | goog.uri.utils.needsPhishingProtection_ = false; |
281 | |
282 | // Use quoted access, just in case the user isn't using location externs. |
283 | var location = goog.global['location']; |
284 | if (location) { |
285 | var href = location['href']; |
286 | if (href) { |
287 | var domain = goog.uri.utils.getDomain(href); |
288 | if (domain && domain != location['hostname']) { |
289 | // Phishing attack |
290 | goog.uri.utils.needsPhishingProtection_ = true; |
291 | throw Error(); |
292 | } |
293 | } |
294 | } |
295 | } |
296 | }; |
297 | |
298 | |
299 | /** |
300 | * @param {?string} uri A possibly null string. |
301 | * @param {boolean=} opt_preserveReserved If true, percent-encoding of RFC-3986 |
302 | * reserved characters will not be removed. |
303 | * @return {?string} The string URI-decoded, or null if uri is null. |
304 | * @private |
305 | */ |
306 | goog.uri.utils.decodeIfPossible_ = function(uri, opt_preserveReserved) { |
307 | if (!uri) { |
308 | return uri; |
309 | } |
310 | |
311 | return opt_preserveReserved ? decodeURI(uri) : decodeURIComponent(uri); |
312 | }; |
313 | |
314 | |
315 | /** |
316 | * Gets a URI component by index. |
317 | * |
318 | * It is preferred to use the getPathEncoded() variety of functions ahead, |
319 | * since they are more readable. |
320 | * |
321 | * @param {goog.uri.utils.ComponentIndex} componentIndex The component index. |
322 | * @param {string} uri The URI to examine. |
323 | * @return {?string} The still-encoded component, or null if the component |
324 | * is not present. |
325 | * @private |
326 | */ |
327 | goog.uri.utils.getComponentByIndex_ = function(componentIndex, uri) { |
328 | // Convert undefined, null, and empty string into null. |
329 | return goog.uri.utils.split(uri)[componentIndex] || null; |
330 | }; |
331 | |
332 | |
333 | /** |
334 | * @param {string} uri The URI to examine. |
335 | * @return {?string} The protocol or scheme, or null if none. Does not |
336 | * include trailing colons or slashes. |
337 | */ |
338 | goog.uri.utils.getScheme = function(uri) { |
339 | return goog.uri.utils.getComponentByIndex_( |
340 | goog.uri.utils.ComponentIndex.SCHEME, uri); |
341 | }; |
342 | |
343 | |
344 | /** |
345 | * Gets the effective scheme for the URL. If the URL is relative then the |
346 | * scheme is derived from the page's location. |
347 | * @param {string} uri The URI to examine. |
348 | * @return {string} The protocol or scheme, always lower case. |
349 | */ |
350 | goog.uri.utils.getEffectiveScheme = function(uri) { |
351 | var scheme = goog.uri.utils.getScheme(uri); |
352 | if (!scheme && self.location) { |
353 | var protocol = self.location.protocol; |
354 | scheme = protocol.substr(0, protocol.length - 1); |
355 | } |
356 | // NOTE: When called from a web worker in Firefox 3.5, location maybe null. |
357 | // All other browsers with web workers support self.location from the worker. |
358 | return scheme ? scheme.toLowerCase() : ''; |
359 | }; |
360 | |
361 | |
362 | /** |
363 | * @param {string} uri The URI to examine. |
364 | * @return {?string} The user name still encoded, or null if none. |
365 | */ |
366 | goog.uri.utils.getUserInfoEncoded = function(uri) { |
367 | return goog.uri.utils.getComponentByIndex_( |
368 | goog.uri.utils.ComponentIndex.USER_INFO, uri); |
369 | }; |
370 | |
371 | |
372 | /** |
373 | * @param {string} uri The URI to examine. |
374 | * @return {?string} The decoded user info, or null if none. |
375 | */ |
376 | goog.uri.utils.getUserInfo = function(uri) { |
377 | return goog.uri.utils.decodeIfPossible_( |
378 | goog.uri.utils.getUserInfoEncoded(uri)); |
379 | }; |
380 | |
381 | |
382 | /** |
383 | * @param {string} uri The URI to examine. |
384 | * @return {?string} The domain name still encoded, or null if none. |
385 | */ |
386 | goog.uri.utils.getDomainEncoded = function(uri) { |
387 | return goog.uri.utils.getComponentByIndex_( |
388 | goog.uri.utils.ComponentIndex.DOMAIN, uri); |
389 | }; |
390 | |
391 | |
392 | /** |
393 | * @param {string} uri The URI to examine. |
394 | * @return {?string} The decoded domain, or null if none. |
395 | */ |
396 | goog.uri.utils.getDomain = function(uri) { |
397 | return goog.uri.utils.decodeIfPossible_( |
398 | goog.uri.utils.getDomainEncoded(uri), true /* opt_preserveReserved */); |
399 | }; |
400 | |
401 | |
402 | /** |
403 | * @param {string} uri The URI to examine. |
404 | * @return {?number} The port number, or null if none. |
405 | */ |
406 | goog.uri.utils.getPort = function(uri) { |
407 | // Coerce to a number. If the result of getComponentByIndex_ is null or |
408 | // non-numeric, the number coersion yields NaN. This will then return |
409 | // null for all non-numeric cases (though also zero, which isn't a relevant |
410 | // port number). |
411 | return Number(goog.uri.utils.getComponentByIndex_( |
412 | goog.uri.utils.ComponentIndex.PORT, uri)) || null; |
413 | }; |
414 | |
415 | |
416 | /** |
417 | * @param {string} uri The URI to examine. |
418 | * @return {?string} The path still encoded, or null if none. Includes the |
419 | * leading slash, if any. |
420 | */ |
421 | goog.uri.utils.getPathEncoded = function(uri) { |
422 | return goog.uri.utils.getComponentByIndex_( |
423 | goog.uri.utils.ComponentIndex.PATH, uri); |
424 | }; |
425 | |
426 | |
427 | /** |
428 | * @param {string} uri The URI to examine. |
429 | * @return {?string} The decoded path, or null if none. Includes the leading |
430 | * slash, if any. |
431 | */ |
432 | goog.uri.utils.getPath = function(uri) { |
433 | return goog.uri.utils.decodeIfPossible_( |
434 | goog.uri.utils.getPathEncoded(uri), true /* opt_preserveReserved */); |
435 | }; |
436 | |
437 | |
438 | /** |
439 | * @param {string} uri The URI to examine. |
440 | * @return {?string} The query data still encoded, or null if none. Does not |
441 | * include the question mark itself. |
442 | */ |
443 | goog.uri.utils.getQueryData = function(uri) { |
444 | return goog.uri.utils.getComponentByIndex_( |
445 | goog.uri.utils.ComponentIndex.QUERY_DATA, uri); |
446 | }; |
447 | |
448 | |
449 | /** |
450 | * @param {string} uri The URI to examine. |
451 | * @return {?string} The fragment identifier, or null if none. Does not |
452 | * include the hash mark itself. |
453 | */ |
454 | goog.uri.utils.getFragmentEncoded = function(uri) { |
455 | // The hash mark may not appear in any other part of the URL. |
456 | var hashIndex = uri.indexOf('#'); |
457 | return hashIndex < 0 ? null : uri.substr(hashIndex + 1); |
458 | }; |
459 | |
460 | |
461 | /** |
462 | * @param {string} uri The URI to examine. |
463 | * @param {?string} fragment The encoded fragment identifier, or null if none. |
464 | * Does not include the hash mark itself. |
465 | * @return {string} The URI with the fragment set. |
466 | */ |
467 | goog.uri.utils.setFragmentEncoded = function(uri, fragment) { |
468 | return goog.uri.utils.removeFragment(uri) + (fragment ? '#' + fragment : ''); |
469 | }; |
470 | |
471 | |
472 | /** |
473 | * @param {string} uri The URI to examine. |
474 | * @return {?string} The decoded fragment identifier, or null if none. Does |
475 | * not include the hash mark. |
476 | */ |
477 | goog.uri.utils.getFragment = function(uri) { |
478 | return goog.uri.utils.decodeIfPossible_( |
479 | goog.uri.utils.getFragmentEncoded(uri)); |
480 | }; |
481 | |
482 | |
483 | /** |
484 | * Extracts everything up to the port of the URI. |
485 | * @param {string} uri The URI string. |
486 | * @return {string} Everything up to and including the port. |
487 | */ |
488 | goog.uri.utils.getHost = function(uri) { |
489 | var pieces = goog.uri.utils.split(uri); |
490 | return goog.uri.utils.buildFromEncodedParts( |
491 | pieces[goog.uri.utils.ComponentIndex.SCHEME], |
492 | pieces[goog.uri.utils.ComponentIndex.USER_INFO], |
493 | pieces[goog.uri.utils.ComponentIndex.DOMAIN], |
494 | pieces[goog.uri.utils.ComponentIndex.PORT]); |
495 | }; |
496 | |
497 | |
498 | /** |
499 | * Extracts the path of the URL and everything after. |
500 | * @param {string} uri The URI string. |
501 | * @return {string} The URI, starting at the path and including the query |
502 | * parameters and fragment identifier. |
503 | */ |
504 | goog.uri.utils.getPathAndAfter = function(uri) { |
505 | var pieces = goog.uri.utils.split(uri); |
506 | return goog.uri.utils.buildFromEncodedParts(null, null, null, null, |
507 | pieces[goog.uri.utils.ComponentIndex.PATH], |
508 | pieces[goog.uri.utils.ComponentIndex.QUERY_DATA], |
509 | pieces[goog.uri.utils.ComponentIndex.FRAGMENT]); |
510 | }; |
511 | |
512 | |
513 | /** |
514 | * Gets the URI with the fragment identifier removed. |
515 | * @param {string} uri The URI to examine. |
516 | * @return {string} Everything preceding the hash mark. |
517 | */ |
518 | goog.uri.utils.removeFragment = function(uri) { |
519 | // The hash mark may not appear in any other part of the URL. |
520 | var hashIndex = uri.indexOf('#'); |
521 | return hashIndex < 0 ? uri : uri.substr(0, hashIndex); |
522 | }; |
523 | |
524 | |
525 | /** |
526 | * Ensures that two URI's have the exact same domain, scheme, and port. |
527 | * |
528 | * Unlike the version in goog.Uri, this checks protocol, and therefore is |
529 | * suitable for checking against the browser's same-origin policy. |
530 | * |
531 | * @param {string} uri1 The first URI. |
532 | * @param {string} uri2 The second URI. |
533 | * @return {boolean} Whether they have the same scheme, domain and port. |
534 | */ |
535 | goog.uri.utils.haveSameDomain = function(uri1, uri2) { |
536 | var pieces1 = goog.uri.utils.split(uri1); |
537 | var pieces2 = goog.uri.utils.split(uri2); |
538 | return pieces1[goog.uri.utils.ComponentIndex.DOMAIN] == |
539 | pieces2[goog.uri.utils.ComponentIndex.DOMAIN] && |
540 | pieces1[goog.uri.utils.ComponentIndex.SCHEME] == |
541 | pieces2[goog.uri.utils.ComponentIndex.SCHEME] && |
542 | pieces1[goog.uri.utils.ComponentIndex.PORT] == |
543 | pieces2[goog.uri.utils.ComponentIndex.PORT]; |
544 | }; |
545 | |
546 | |
547 | /** |
548 | * Asserts that there are no fragment or query identifiers, only in uncompiled |
549 | * mode. |
550 | * @param {string} uri The URI to examine. |
551 | * @private |
552 | */ |
553 | goog.uri.utils.assertNoFragmentsOrQueries_ = function(uri) { |
554 | // NOTE: would use goog.asserts here, but jscompiler doesn't know that |
555 | // indexOf has no side effects. |
556 | if (goog.DEBUG && (uri.indexOf('#') >= 0 || uri.indexOf('?') >= 0)) { |
557 | throw Error('goog.uri.utils: Fragment or query identifiers are not ' + |
558 | 'supported: [' + uri + ']'); |
559 | } |
560 | }; |
561 | |
562 | |
563 | /** |
564 | * Supported query parameter values by the parameter serializing utilities. |
565 | * |
566 | * If a value is null or undefined, the key-value pair is skipped, as an easy |
567 | * way to omit parameters conditionally. Non-array parameters are converted |
568 | * to a string and URI encoded. Array values are expanded into multiple |
569 | * &key=value pairs, with each element stringized and URI-encoded. |
570 | * |
571 | * @typedef {*} |
572 | */ |
573 | goog.uri.utils.QueryValue; |
574 | |
575 | |
576 | /** |
577 | * An array representing a set of query parameters with alternating keys |
578 | * and values. |
579 | * |
580 | * Keys are assumed to be URI encoded already and live at even indices. See |
581 | * goog.uri.utils.QueryValue for details on how parameter values are encoded. |
582 | * |
583 | * Example: |
584 | * <pre> |
585 | * var data = [ |
586 | * // Simple param: ?name=BobBarker |
587 | * 'name', 'BobBarker', |
588 | * // Conditional param -- may be omitted entirely. |
589 | * 'specialDietaryNeeds', hasDietaryNeeds() ? getDietaryNeeds() : null, |
590 | * // Multi-valued param: &house=LosAngeles&house=NewYork&house=null |
591 | * 'house', ['LosAngeles', 'NewYork', null] |
592 | * ]; |
593 | * </pre> |
594 | * |
595 | * @typedef {!Array.<string|goog.uri.utils.QueryValue>} |
596 | */ |
597 | goog.uri.utils.QueryArray; |
598 | |
599 | |
600 | /** |
601 | * Appends a URI and query data in a string buffer with special preconditions. |
602 | * |
603 | * Internal implementation utility, performing very few object allocations. |
604 | * |
605 | * @param {!Array.<string|undefined>} buffer A string buffer. The first element |
606 | * must be the base URI, and may have a fragment identifier. If the array |
607 | * contains more than one element, the second element must be an ampersand, |
608 | * and may be overwritten, depending on the base URI. Undefined elements |
609 | * are treated as empty-string. |
610 | * @return {string} The concatenated URI and query data. |
611 | * @private |
612 | */ |
613 | goog.uri.utils.appendQueryData_ = function(buffer) { |
614 | if (buffer[1]) { |
615 | // At least one query parameter was added. We need to check the |
616 | // punctuation mark, which is currently an ampersand, and also make sure |
617 | // there aren't any interfering fragment identifiers. |
618 | var baseUri = /** @type {string} */ (buffer[0]); |
619 | var hashIndex = baseUri.indexOf('#'); |
620 | if (hashIndex >= 0) { |
621 | // Move the fragment off the base part of the URI into the end. |
622 | buffer.push(baseUri.substr(hashIndex)); |
623 | buffer[0] = baseUri = baseUri.substr(0, hashIndex); |
624 | } |
625 | var questionIndex = baseUri.indexOf('?'); |
626 | if (questionIndex < 0) { |
627 | // No question mark, so we need a question mark instead of an ampersand. |
628 | buffer[1] = '?'; |
629 | } else if (questionIndex == baseUri.length - 1) { |
630 | // Question mark is the very last character of the existing URI, so don't |
631 | // append an additional delimiter. |
632 | buffer[1] = undefined; |
633 | } |
634 | } |
635 | |
636 | return buffer.join(''); |
637 | }; |
638 | |
639 | |
640 | /** |
641 | * Appends key=value pairs to an array, supporting multi-valued objects. |
642 | * @param {string} key The key prefix. |
643 | * @param {goog.uri.utils.QueryValue} value The value to serialize. |
644 | * @param {!Array.<string>} pairs The array to which the 'key=value' strings |
645 | * should be appended. |
646 | * @private |
647 | */ |
648 | goog.uri.utils.appendKeyValuePairs_ = function(key, value, pairs) { |
649 | if (goog.isArray(value)) { |
650 | // Convince the compiler it's an array. |
651 | goog.asserts.assertArray(value); |
652 | for (var j = 0; j < value.length; j++) { |
653 | // Convert to string explicitly, to short circuit the null and array |
654 | // logic in this function -- this ensures that null and undefined get |
655 | // written as literal 'null' and 'undefined', and arrays don't get |
656 | // expanded out but instead encoded in the default way. |
657 | goog.uri.utils.appendKeyValuePairs_(key, String(value[j]), pairs); |
658 | } |
659 | } else if (value != null) { |
660 | // Skip a top-level null or undefined entirely. |
661 | pairs.push('&', key, |
662 | // Check for empty string. Zero gets encoded into the url as literal |
663 | // strings. For empty string, skip the equal sign, to be consistent |
664 | // with UriBuilder.java. |
665 | value === '' ? '' : '=', |
666 | goog.string.urlEncode(value)); |
667 | } |
668 | }; |
669 | |
670 | |
671 | /** |
672 | * Builds a buffer of query data from a sequence of alternating keys and values. |
673 | * |
674 | * @param {!Array.<string|undefined>} buffer A string buffer to append to. The |
675 | * first element appended will be an '&', and may be replaced by the caller. |
676 | * @param {goog.uri.utils.QueryArray|Arguments} keysAndValues An array with |
677 | * alternating keys and values -- see the typedef. |
678 | * @param {number=} opt_startIndex A start offset into the arary, defaults to 0. |
679 | * @return {!Array.<string|undefined>} The buffer argument. |
680 | * @private |
681 | */ |
682 | goog.uri.utils.buildQueryDataBuffer_ = function( |
683 | buffer, keysAndValues, opt_startIndex) { |
684 | goog.asserts.assert(Math.max(keysAndValues.length - (opt_startIndex || 0), |
685 | 0) % 2 == 0, 'goog.uri.utils: Key/value lists must be even in length.'); |
686 | |
687 | for (var i = opt_startIndex || 0; i < keysAndValues.length; i += 2) { |
688 | goog.uri.utils.appendKeyValuePairs_( |
689 | keysAndValues[i], keysAndValues[i + 1], buffer); |
690 | } |
691 | |
692 | return buffer; |
693 | }; |
694 | |
695 | |
696 | /** |
697 | * Builds a query data string from a sequence of alternating keys and values. |
698 | * Currently generates "&key&" for empty args. |
699 | * |
700 | * @param {goog.uri.utils.QueryArray} keysAndValues Alternating keys and |
701 | * values. See the typedef. |
702 | * @param {number=} opt_startIndex A start offset into the arary, defaults to 0. |
703 | * @return {string} The encoded query string, in the form 'a=1&b=2'. |
704 | */ |
705 | goog.uri.utils.buildQueryData = function(keysAndValues, opt_startIndex) { |
706 | var buffer = goog.uri.utils.buildQueryDataBuffer_( |
707 | [], keysAndValues, opt_startIndex); |
708 | buffer[0] = ''; // Remove the leading ampersand. |
709 | return buffer.join(''); |
710 | }; |
711 | |
712 | |
713 | /** |
714 | * Builds a buffer of query data from a map. |
715 | * |
716 | * @param {!Array.<string|undefined>} buffer A string buffer to append to. The |
717 | * first element appended will be an '&', and may be replaced by the caller. |
718 | * @param {Object.<goog.uri.utils.QueryValue>} map An object where keys are |
719 | * URI-encoded parameter keys, and the values conform to the contract |
720 | * specified in the goog.uri.utils.QueryValue typedef. |
721 | * @return {!Array.<string|undefined>} The buffer argument. |
722 | * @private |
723 | */ |
724 | goog.uri.utils.buildQueryDataBufferFromMap_ = function(buffer, map) { |
725 | for (var key in map) { |
726 | goog.uri.utils.appendKeyValuePairs_(key, map[key], buffer); |
727 | } |
728 | |
729 | return buffer; |
730 | }; |
731 | |
732 | |
733 | /** |
734 | * Builds a query data string from a map. |
735 | * Currently generates "&key&" for empty args. |
736 | * |
737 | * @param {Object} map An object where keys are URI-encoded parameter keys, |
738 | * and the values are arbitrary types or arrays. Keys with a null value |
739 | * are dropped. |
740 | * @return {string} The encoded query string, in the form 'a=1&b=2'. |
741 | */ |
742 | goog.uri.utils.buildQueryDataFromMap = function(map) { |
743 | var buffer = goog.uri.utils.buildQueryDataBufferFromMap_([], map); |
744 | buffer[0] = ''; |
745 | return buffer.join(''); |
746 | }; |
747 | |
748 | |
749 | /** |
750 | * Appends URI parameters to an existing URI. |
751 | * |
752 | * The variable arguments may contain alternating keys and values. Keys are |
753 | * assumed to be already URI encoded. The values should not be URI-encoded, |
754 | * and will instead be encoded by this function. |
755 | * <pre> |
756 | * appendParams('http://www.foo.com?existing=true', |
757 | * 'key1', 'value1', |
758 | * 'key2', 'value?willBeEncoded', |
759 | * 'key3', ['valueA', 'valueB', 'valueC'], |
760 | * 'key4', null); |
761 | * result: 'http://www.foo.com?existing=true&' + |
762 | * 'key1=value1&' + |
763 | * 'key2=value%3FwillBeEncoded&' + |
764 | * 'key3=valueA&key3=valueB&key3=valueC' |
765 | * </pre> |
766 | * |
767 | * A single call to this function will not exhibit quadratic behavior in IE, |
768 | * whereas multiple repeated calls may, although the effect is limited by |
769 | * fact that URL's generally can't exceed 2kb. |
770 | * |
771 | * @param {string} uri The original URI, which may already have query data. |
772 | * @param {...(goog.uri.utils.QueryArray|string|goog.uri.utils.QueryValue)} var_args |
773 | * An array or argument list conforming to goog.uri.utils.QueryArray. |
774 | * @return {string} The URI with all query parameters added. |
775 | */ |
776 | goog.uri.utils.appendParams = function(uri, var_args) { |
777 | return goog.uri.utils.appendQueryData_( |
778 | arguments.length == 2 ? |
779 | goog.uri.utils.buildQueryDataBuffer_([uri], arguments[1], 0) : |
780 | goog.uri.utils.buildQueryDataBuffer_([uri], arguments, 1)); |
781 | }; |
782 | |
783 | |
784 | /** |
785 | * Appends query parameters from a map. |
786 | * |
787 | * @param {string} uri The original URI, which may already have query data. |
788 | * @param {Object} map An object where keys are URI-encoded parameter keys, |
789 | * and the values are arbitrary types or arrays. Keys with a null value |
790 | * are dropped. |
791 | * @return {string} The new parameters. |
792 | */ |
793 | goog.uri.utils.appendParamsFromMap = function(uri, map) { |
794 | return goog.uri.utils.appendQueryData_( |
795 | goog.uri.utils.buildQueryDataBufferFromMap_([uri], map)); |
796 | }; |
797 | |
798 | |
799 | /** |
800 | * Appends a single URI parameter. |
801 | * |
802 | * Repeated calls to this can exhibit quadratic behavior in IE6 due to the |
803 | * way string append works, though it should be limited given the 2kb limit. |
804 | * |
805 | * @param {string} uri The original URI, which may already have query data. |
806 | * @param {string} key The key, which must already be URI encoded. |
807 | * @param {*=} opt_value The value, which will be stringized and encoded |
808 | * (assumed not already to be encoded). If omitted, undefined, or null, the |
809 | * key will be added as a valueless parameter. |
810 | * @return {string} The URI with the query parameter added. |
811 | */ |
812 | goog.uri.utils.appendParam = function(uri, key, opt_value) { |
813 | var paramArr = [uri, '&', key]; |
814 | if (goog.isDefAndNotNull(opt_value)) { |
815 | paramArr.push('=', goog.string.urlEncode(opt_value)); |
816 | } |
817 | return goog.uri.utils.appendQueryData_(paramArr); |
818 | }; |
819 | |
820 | |
821 | /** |
822 | * Finds the next instance of a query parameter with the specified name. |
823 | * |
824 | * Does not instantiate any objects. |
825 | * |
826 | * @param {string} uri The URI to search. May contain a fragment identifier |
827 | * if opt_hashIndex is specified. |
828 | * @param {number} startIndex The index to begin searching for the key at. A |
829 | * match may be found even if this is one character after the ampersand. |
830 | * @param {string} keyEncoded The URI-encoded key. |
831 | * @param {number} hashOrEndIndex Index to stop looking at. If a hash |
832 | * mark is present, it should be its index, otherwise it should be the |
833 | * length of the string. |
834 | * @return {number} The position of the first character in the key's name, |
835 | * immediately after either a question mark or a dot. |
836 | * @private |
837 | */ |
838 | goog.uri.utils.findParam_ = function( |
839 | uri, startIndex, keyEncoded, hashOrEndIndex) { |
840 | var index = startIndex; |
841 | var keyLength = keyEncoded.length; |
842 | |
843 | // Search for the key itself and post-filter for surronuding punctuation, |
844 | // rather than expensively building a regexp. |
845 | while ((index = uri.indexOf(keyEncoded, index)) >= 0 && |
846 | index < hashOrEndIndex) { |
847 | var precedingChar = uri.charCodeAt(index - 1); |
848 | // Ensure that the preceding character is '&' or '?'. |
849 | if (precedingChar == goog.uri.utils.CharCode_.AMPERSAND || |
850 | precedingChar == goog.uri.utils.CharCode_.QUESTION) { |
851 | // Ensure the following character is '&', '=', '#', or NaN |
852 | // (end of string). |
853 | var followingChar = uri.charCodeAt(index + keyLength); |
854 | if (!followingChar || |
855 | followingChar == goog.uri.utils.CharCode_.EQUAL || |
856 | followingChar == goog.uri.utils.CharCode_.AMPERSAND || |
857 | followingChar == goog.uri.utils.CharCode_.HASH) { |
858 | return index; |
859 | } |
860 | } |
861 | index += keyLength + 1; |
862 | } |
863 | |
864 | return -1; |
865 | }; |
866 | |
867 | |
868 | /** |
869 | * Regular expression for finding a hash mark or end of string. |
870 | * @type {RegExp} |
871 | * @private |
872 | */ |
873 | goog.uri.utils.hashOrEndRe_ = /#|$/; |
874 | |
875 | |
876 | /** |
877 | * Determines if the URI contains a specific key. |
878 | * |
879 | * Performs no object instantiations. |
880 | * |
881 | * @param {string} uri The URI to process. May contain a fragment |
882 | * identifier. |
883 | * @param {string} keyEncoded The URI-encoded key. Case-sensitive. |
884 | * @return {boolean} Whether the key is present. |
885 | */ |
886 | goog.uri.utils.hasParam = function(uri, keyEncoded) { |
887 | return goog.uri.utils.findParam_(uri, 0, keyEncoded, |
888 | uri.search(goog.uri.utils.hashOrEndRe_)) >= 0; |
889 | }; |
890 | |
891 | |
892 | /** |
893 | * Gets the first value of a query parameter. |
894 | * @param {string} uri The URI to process. May contain a fragment. |
895 | * @param {string} keyEncoded The URI-encoded key. Case-sensitive. |
896 | * @return {?string} The first value of the parameter (URI-decoded), or null |
897 | * if the parameter is not found. |
898 | */ |
899 | goog.uri.utils.getParamValue = function(uri, keyEncoded) { |
900 | var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_); |
901 | var foundIndex = goog.uri.utils.findParam_( |
902 | uri, 0, keyEncoded, hashOrEndIndex); |
903 | |
904 | if (foundIndex < 0) { |
905 | return null; |
906 | } else { |
907 | var endPosition = uri.indexOf('&', foundIndex); |
908 | if (endPosition < 0 || endPosition > hashOrEndIndex) { |
909 | endPosition = hashOrEndIndex; |
910 | } |
911 | // Progress forth to the end of the "key=" or "key&" substring. |
912 | foundIndex += keyEncoded.length + 1; |
913 | // Use substr, because it (unlike substring) will return empty string |
914 | // if foundIndex > endPosition. |
915 | return goog.string.urlDecode( |
916 | uri.substr(foundIndex, endPosition - foundIndex)); |
917 | } |
918 | }; |
919 | |
920 | |
921 | /** |
922 | * Gets all values of a query parameter. |
923 | * @param {string} uri The URI to process. May contain a framgnet. |
924 | * @param {string} keyEncoded The URI-encoded key. Case-snsitive. |
925 | * @return {!Array.<string>} All URI-decoded values with the given key. |
926 | * If the key is not found, this will have length 0, but never be null. |
927 | */ |
928 | goog.uri.utils.getParamValues = function(uri, keyEncoded) { |
929 | var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_); |
930 | var position = 0; |
931 | var foundIndex; |
932 | var result = []; |
933 | |
934 | while ((foundIndex = goog.uri.utils.findParam_( |
935 | uri, position, keyEncoded, hashOrEndIndex)) >= 0) { |
936 | // Find where this parameter ends, either the '&' or the end of the |
937 | // query parameters. |
938 | position = uri.indexOf('&', foundIndex); |
939 | if (position < 0 || position > hashOrEndIndex) { |
940 | position = hashOrEndIndex; |
941 | } |
942 | |
943 | // Progress forth to the end of the "key=" or "key&" substring. |
944 | foundIndex += keyEncoded.length + 1; |
945 | // Use substr, because it (unlike substring) will return empty string |
946 | // if foundIndex > position. |
947 | result.push(goog.string.urlDecode(uri.substr( |
948 | foundIndex, position - foundIndex))); |
949 | } |
950 | |
951 | return result; |
952 | }; |
953 | |
954 | |
955 | /** |
956 | * Regexp to find trailing question marks and ampersands. |
957 | * @type {RegExp} |
958 | * @private |
959 | */ |
960 | goog.uri.utils.trailingQueryPunctuationRe_ = /[?&]($|#)/; |
961 | |
962 | |
963 | /** |
964 | * Removes all instances of a query parameter. |
965 | * @param {string} uri The URI to process. Must not contain a fragment. |
966 | * @param {string} keyEncoded The URI-encoded key. |
967 | * @return {string} The URI with all instances of the parameter removed. |
968 | */ |
969 | goog.uri.utils.removeParam = function(uri, keyEncoded) { |
970 | var hashOrEndIndex = uri.search(goog.uri.utils.hashOrEndRe_); |
971 | var position = 0; |
972 | var foundIndex; |
973 | var buffer = []; |
974 | |
975 | // Look for a query parameter. |
976 | while ((foundIndex = goog.uri.utils.findParam_( |
977 | uri, position, keyEncoded, hashOrEndIndex)) >= 0) { |
978 | // Get the portion of the query string up to, but not including, the ? |
979 | // or & starting the parameter. |
980 | buffer.push(uri.substring(position, foundIndex)); |
981 | // Progress to immediately after the '&'. If not found, go to the end. |
982 | // Avoid including the hash mark. |
983 | position = Math.min((uri.indexOf('&', foundIndex) + 1) || hashOrEndIndex, |
984 | hashOrEndIndex); |
985 | } |
986 | |
987 | // Append everything that is remaining. |
988 | buffer.push(uri.substr(position)); |
989 | |
990 | // Join the buffer, and remove trailing punctuation that remains. |
991 | return buffer.join('').replace( |
992 | goog.uri.utils.trailingQueryPunctuationRe_, '$1'); |
993 | }; |
994 | |
995 | |
996 | /** |
997 | * Replaces all existing definitions of a parameter with a single definition. |
998 | * |
999 | * Repeated calls to this can exhibit quadratic behavior due to the need to |
1000 | * find existing instances and reconstruct the string, though it should be |
1001 | * limited given the 2kb limit. Consider using appendParams to append multiple |
1002 | * parameters in bulk. |
1003 | * |
1004 | * @param {string} uri The original URI, which may already have query data. |
1005 | * @param {string} keyEncoded The key, which must already be URI encoded. |
1006 | * @param {*} value The value, which will be stringized and encoded (assumed |
1007 | * not already to be encoded). |
1008 | * @return {string} The URI with the query parameter added. |
1009 | */ |
1010 | goog.uri.utils.setParam = function(uri, keyEncoded, value) { |
1011 | return goog.uri.utils.appendParam( |
1012 | goog.uri.utils.removeParam(uri, keyEncoded), keyEncoded, value); |
1013 | }; |
1014 | |
1015 | |
1016 | /** |
1017 | * Generates a URI path using a given URI and a path with checks to |
1018 | * prevent consecutive "//". The baseUri passed in must not contain |
1019 | * query or fragment identifiers. The path to append may not contain query or |
1020 | * fragment identifiers. |
1021 | * |
1022 | * @param {string} baseUri URI to use as the base. |
1023 | * @param {string} path Path to append. |
1024 | * @return {string} Updated URI. |
1025 | */ |
1026 | goog.uri.utils.appendPath = function(baseUri, path) { |
1027 | goog.uri.utils.assertNoFragmentsOrQueries_(baseUri); |
1028 | |
1029 | // Remove any trailing '/' |
1030 | if (goog.string.endsWith(baseUri, '/')) { |
1031 | baseUri = baseUri.substr(0, baseUri.length - 1); |
1032 | } |
1033 | // Remove any leading '/' |
1034 | if (goog.string.startsWith(path, '/')) { |
1035 | path = path.substr(1); |
1036 | } |
1037 | return goog.string.buildString(baseUri, '/', path); |
1038 | }; |
1039 | |
1040 | |
1041 | /** |
1042 | * Replaces the path. |
1043 | * @param {string} uri URI to use as the base. |
1044 | * @param {string} path New path. |
1045 | * @return {string} Updated URI. |
1046 | */ |
1047 | goog.uri.utils.setPath = function(uri, path) { |
1048 | // Add any missing '/'. |
1049 | if (!goog.string.startsWith(path, '/')) { |
1050 | path = '/' + path; |
1051 | } |
1052 | var parts = goog.uri.utils.split(uri); |
1053 | return goog.uri.utils.buildFromEncodedParts( |
1054 | parts[goog.uri.utils.ComponentIndex.SCHEME], |
1055 | parts[goog.uri.utils.ComponentIndex.USER_INFO], |
1056 | parts[goog.uri.utils.ComponentIndex.DOMAIN], |
1057 | parts[goog.uri.utils.ComponentIndex.PORT], |
1058 | path, |
1059 | parts[goog.uri.utils.ComponentIndex.QUERY_DATA], |
1060 | parts[goog.uri.utils.ComponentIndex.FRAGMENT]); |
1061 | }; |
1062 | |
1063 | |
1064 | /** |
1065 | * Standard supported query parameters. |
1066 | * @enum {string} |
1067 | */ |
1068 | goog.uri.utils.StandardQueryParam = { |
1069 | |
1070 | /** Unused parameter for unique-ifying. */ |
1071 | RANDOM: 'zx' |
1072 | }; |
1073 | |
1074 | |
1075 | /** |
1076 | * Sets the zx parameter of a URI to a random value. |
1077 | * @param {string} uri Any URI. |
1078 | * @return {string} That URI with the "zx" parameter added or replaced to |
1079 | * contain a random string. |
1080 | */ |
1081 | goog.uri.utils.makeUnique = function(uri) { |
1082 | return goog.uri.utils.setParam(uri, |
1083 | goog.uri.utils.StandardQueryParam.RANDOM, goog.string.getRandomString()); |
1084 | }; |