lib/goog/string/string.js

1// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Utilities for string manipulation.
17 * @author arv@google.com (Erik Arvidsson)
18 */
19
20
21/**
22 * Namespace for string utilities
23 */
24goog.provide('goog.string');
25goog.provide('goog.string.Unicode');
26
27
28/**
29 * @define {boolean} Enables HTML escaping of lowercase letter "e" which helps
30 * with detection of double-escaping as this letter is frequently used.
31 */
32goog.define('goog.string.DETECT_DOUBLE_ESCAPING', false);
33
34
35/**
36 * @define {boolean} Whether to force non-dom html unescaping.
37 */
38goog.define('goog.string.FORCE_NON_DOM_HTML_UNESCAPING', false);
39
40
41/**
42 * Common Unicode string characters.
43 * @enum {string}
44 */
45goog.string.Unicode = {
46 NBSP: '\xa0'
47};
48
49
50/**
51 * Fast prefix-checker.
52 * @param {string} str The string to check.
53 * @param {string} prefix A string to look for at the start of {@code str}.
54 * @return {boolean} True if {@code str} begins with {@code prefix}.
55 */
56goog.string.startsWith = function(str, prefix) {
57 return str.lastIndexOf(prefix, 0) == 0;
58};
59
60
61/**
62 * Fast suffix-checker.
63 * @param {string} str The string to check.
64 * @param {string} suffix A string to look for at the end of {@code str}.
65 * @return {boolean} True if {@code str} ends with {@code suffix}.
66 */
67goog.string.endsWith = function(str, suffix) {
68 var l = str.length - suffix.length;
69 return l >= 0 && str.indexOf(suffix, l) == l;
70};
71
72
73/**
74 * Case-insensitive prefix-checker.
75 * @param {string} str The string to check.
76 * @param {string} prefix A string to look for at the end of {@code str}.
77 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
78 * case).
79 */
80goog.string.caseInsensitiveStartsWith = function(str, prefix) {
81 return goog.string.caseInsensitiveCompare(
82 prefix, str.substr(0, prefix.length)) == 0;
83};
84
85
86/**
87 * Case-insensitive suffix-checker.
88 * @param {string} str The string to check.
89 * @param {string} suffix A string to look for at the end of {@code str}.
90 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
91 * case).
92 */
93goog.string.caseInsensitiveEndsWith = function(str, suffix) {
94 return goog.string.caseInsensitiveCompare(
95 suffix, str.substr(str.length - suffix.length, suffix.length)) == 0;
96};
97
98
99/**
100 * Case-insensitive equality checker.
101 * @param {string} str1 First string to check.
102 * @param {string} str2 Second string to check.
103 * @return {boolean} True if {@code str1} and {@code str2} are the same string,
104 * ignoring case.
105 */
106goog.string.caseInsensitiveEquals = function(str1, str2) {
107 return str1.toLowerCase() == str2.toLowerCase();
108};
109
110
111/**
112 * Does simple python-style string substitution.
113 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
114 * @param {string} str The string containing the pattern.
115 * @param {...*} var_args The items to substitute into the pattern.
116 * @return {string} A copy of {@code str} in which each occurrence of
117 * {@code %s} has been replaced an argument from {@code var_args}.
118 */
119goog.string.subs = function(str, var_args) {
120 var splitParts = str.split('%s');
121 var returnString = '';
122
123 var subsArguments = Array.prototype.slice.call(arguments, 1);
124 while (subsArguments.length &&
125 // Replace up to the last split part. We are inserting in the
126 // positions between split parts.
127 splitParts.length > 1) {
128 returnString += splitParts.shift() + subsArguments.shift();
129 }
130
131 return returnString + splitParts.join('%s'); // Join unused '%s'
132};
133
134
135/**
136 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
137 * and tabs) to a single space, and strips leading and trailing whitespace.
138 * @param {string} str Input string.
139 * @return {string} A copy of {@code str} with collapsed whitespace.
140 */
141goog.string.collapseWhitespace = function(str) {
142 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
143 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
144 // include it in the regexp to enforce consistent cross-browser behavior.
145 return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
146};
147
148
149/**
150 * Checks if a string is empty or contains only whitespaces.
151 * @param {string} str The string to check.
152 * @return {boolean} Whether {@code str} is empty or whitespace only.
153 */
154goog.string.isEmptyOrWhitespace = function(str) {
155 // testing length == 0 first is actually slower in all browsers (about the
156 // same in Opera).
157 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
158 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
159 // include it in the regexp to enforce consistent cross-browser behavior.
160 return /^[\s\xa0]*$/.test(str);
161};
162
163
164/**
165 * Checks if a string is empty.
166 * @param {string} str The string to check.
167 * @return {boolean} Whether {@code str} is empty.
168 */
169goog.string.isEmptyString = function(str) {
170 return str.length == 0;
171};
172
173
174/**
175 * Checks if a string is empty or contains only whitespaces.
176 *
177 * TODO(user): Deprecate this when clients have been switched over to
178 * goog.string.isEmptyOrWhitespace.
179 *
180 * @param {string} str The string to check.
181 * @return {boolean} Whether {@code str} is empty or whitespace only.
182 */
183goog.string.isEmpty = goog.string.isEmptyOrWhitespace;
184
185
186/**
187 * Checks if a string is null, undefined, empty or contains only whitespaces.
188 * @param {*} str The string to check.
189 * @return {boolean} Whether {@code str} is null, undefined, empty, or
190 * whitespace only.
191 * @deprecated Use goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str))
192 * instead.
193 */
194goog.string.isEmptyOrWhitespaceSafe = function(str) {
195 return goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str));
196};
197
198
199/**
200 * Checks if a string is null, undefined, empty or contains only whitespaces.
201 *
202 * TODO(user): Deprecate this when clients have been switched over to
203 * goog.string.isEmptyOrWhitespaceSafe.
204 *
205 * @param {*} str The string to check.
206 * @return {boolean} Whether {@code str} is null, undefined, empty, or
207 * whitespace only.
208 */
209goog.string.isEmptySafe = goog.string.isEmptyOrWhitespaceSafe;
210
211
212/**
213 * Checks if a string is all breaking whitespace.
214 * @param {string} str The string to check.
215 * @return {boolean} Whether the string is all breaking whitespace.
216 */
217goog.string.isBreakingWhitespace = function(str) {
218 return !/[^\t\n\r ]/.test(str);
219};
220
221
222/**
223 * Checks if a string contains all letters.
224 * @param {string} str string to check.
225 * @return {boolean} True if {@code str} consists entirely of letters.
226 */
227goog.string.isAlpha = function(str) {
228 return !/[^a-zA-Z]/.test(str);
229};
230
231
232/**
233 * Checks if a string contains only numbers.
234 * @param {*} str string to check. If not a string, it will be
235 * casted to one.
236 * @return {boolean} True if {@code str} is numeric.
237 */
238goog.string.isNumeric = function(str) {
239 return !/[^0-9]/.test(str);
240};
241
242
243/**
244 * Checks if a string contains only numbers or letters.
245 * @param {string} str string to check.
246 * @return {boolean} True if {@code str} is alphanumeric.
247 */
248goog.string.isAlphaNumeric = function(str) {
249 return !/[^a-zA-Z0-9]/.test(str);
250};
251
252
253/**
254 * Checks if a character is a space character.
255 * @param {string} ch Character to check.
256 * @return {boolean} True if {@code ch} is a space.
257 */
258goog.string.isSpace = function(ch) {
259 return ch == ' ';
260};
261
262
263/**
264 * Checks if a character is a valid unicode character.
265 * @param {string} ch Character to check.
266 * @return {boolean} True if {@code ch} is a valid unicode character.
267 */
268goog.string.isUnicodeChar = function(ch) {
269 return ch.length == 1 && ch >= ' ' && ch <= '~' ||
270 ch >= '\u0080' && ch <= '\uFFFD';
271};
272
273
274/**
275 * Takes a string and replaces newlines with a space. Multiple lines are
276 * replaced with a single space.
277 * @param {string} str The string from which to strip newlines.
278 * @return {string} A copy of {@code str} stripped of newlines.
279 */
280goog.string.stripNewlines = function(str) {
281 return str.replace(/(\r\n|\r|\n)+/g, ' ');
282};
283
284
285/**
286 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
287 * @param {string} str The string to in which to canonicalize newlines.
288 * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
289 */
290goog.string.canonicalizeNewlines = function(str) {
291 return str.replace(/(\r\n|\r|\n)/g, '\n');
292};
293
294
295/**
296 * Normalizes whitespace in a string, replacing all whitespace chars with
297 * a space.
298 * @param {string} str The string in which to normalize whitespace.
299 * @return {string} A copy of {@code str} with all whitespace normalized.
300 */
301goog.string.normalizeWhitespace = function(str) {
302 return str.replace(/\xa0|\s/g, ' ');
303};
304
305
306/**
307 * Normalizes spaces in a string, replacing all consecutive spaces and tabs
308 * with a single space. Replaces non-breaking space with a space.
309 * @param {string} str The string in which to normalize spaces.
310 * @return {string} A copy of {@code str} with all consecutive spaces and tabs
311 * replaced with a single space.
312 */
313goog.string.normalizeSpaces = function(str) {
314 return str.replace(/\xa0|[ \t]+/g, ' ');
315};
316
317
318/**
319 * Removes the breaking spaces from the left and right of the string and
320 * collapses the sequences of breaking spaces in the middle into single spaces.
321 * The original and the result strings render the same way in HTML.
322 * @param {string} str A string in which to collapse spaces.
323 * @return {string} Copy of the string with normalized breaking spaces.
324 */
325goog.string.collapseBreakingSpaces = function(str) {
326 return str.replace(/[\t\r\n ]+/g, ' ').replace(
327 /^[\t\r\n ]+|[\t\r\n ]+$/g, '');
328};
329
330
331/**
332 * Trims white spaces to the left and right of a string.
333 * @param {string} str The string to trim.
334 * @return {string} A trimmed copy of {@code str}.
335 */
336goog.string.trim = (goog.TRUSTED_SITE && String.prototype.trim) ?
337 function(str) {
338 return str.trim();
339 } :
340 function(str) {
341 // Since IE doesn't include non-breaking-space (0xa0) in their \s
342 // character class (as required by section 7.2 of the ECMAScript spec),
343 // we explicitly include it in the regexp to enforce consistent
344 // cross-browser behavior.
345 return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
346 };
347
348
349/**
350 * Trims whitespaces at the left end of a string.
351 * @param {string} str The string to left trim.
352 * @return {string} A trimmed copy of {@code str}.
353 */
354goog.string.trimLeft = function(str) {
355 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
356 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
357 // include it in the regexp to enforce consistent cross-browser behavior.
358 return str.replace(/^[\s\xa0]+/, '');
359};
360
361
362/**
363 * Trims whitespaces at the right end of a string.
364 * @param {string} str The string to right trim.
365 * @return {string} A trimmed copy of {@code str}.
366 */
367goog.string.trimRight = function(str) {
368 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
369 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
370 // include it in the regexp to enforce consistent cross-browser behavior.
371 return str.replace(/[\s\xa0]+$/, '');
372};
373
374
375/**
376 * A string comparator that ignores case.
377 * -1 = str1 less than str2
378 * 0 = str1 equals str2
379 * 1 = str1 greater than str2
380 *
381 * @param {string} str1 The string to compare.
382 * @param {string} str2 The string to compare {@code str1} to.
383 * @return {number} The comparator result, as described above.
384 */
385goog.string.caseInsensitiveCompare = function(str1, str2) {
386 var test1 = String(str1).toLowerCase();
387 var test2 = String(str2).toLowerCase();
388
389 if (test1 < test2) {
390 return -1;
391 } else if (test1 == test2) {
392 return 0;
393 } else {
394 return 1;
395 }
396};
397
398
399/**
400 * Compares two strings interpreting their numeric substrings as numbers.
401 *
402 * @param {string} str1 First string.
403 * @param {string} str2 Second string.
404 * @param {!RegExp} tokenizerRegExp Splits a string into substrings of
405 * non-negative integers, non-numeric characters and optionally fractional
406 * numbers starting with a decimal point.
407 * @return {number} Negative if str1 < str2, 0 is str1 == str2, positive if
408 * str1 > str2.
409 * @private
410 */
411goog.string.numberAwareCompare_ = function(str1, str2, tokenizerRegExp) {
412 if (str1 == str2) {
413 return 0;
414 }
415 if (!str1) {
416 return -1;
417 }
418 if (!str2) {
419 return 1;
420 }
421
422 // Using match to split the entire string ahead of time turns out to be faster
423 // for most inputs than using RegExp.exec or iterating over each character.
424 var tokens1 = str1.toLowerCase().match(tokenizerRegExp);
425 var tokens2 = str2.toLowerCase().match(tokenizerRegExp);
426
427 var count = Math.min(tokens1.length, tokens2.length);
428
429 for (var i = 0; i < count; i++) {
430 var a = tokens1[i];
431 var b = tokens2[i];
432
433 // Compare pairs of tokens, returning if one token sorts before the other.
434 if (a != b) {
435 // Only if both tokens are integers is a special comparison required.
436 // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
437 var num1 = parseInt(a, 10);
438 if (!isNaN(num1)) {
439 var num2 = parseInt(b, 10);
440 if (!isNaN(num2) && num1 - num2) {
441 return num1 - num2;
442 }
443 }
444 return a < b ? -1 : 1;
445 }
446 }
447
448 // If one string is a substring of the other, the shorter string sorts first.
449 if (tokens1.length != tokens2.length) {
450 return tokens1.length - tokens2.length;
451 }
452
453 // The two strings must be equivalent except for case (perfect equality is
454 // tested at the head of the function.) Revert to default ASCII string
455 // comparison to stabilize the sort.
456 return str1 < str2 ? -1 : 1;
457};
458
459
460/**
461 * String comparison function that handles non-negative integer numbers in a
462 * way humans might expect. Using this function, the string 'File 2.jpg' sorts
463 * before 'File 10.jpg', and 'Version 1.9' before 'Version 1.10'. The comparison
464 * is mostly case-insensitive, though strings that are identical except for case
465 * are sorted with the upper-case strings before lower-case.
466 *
467 * This comparison function is up to 50x slower than either the default or the
468 * case-insensitive compare. It should not be used in time-critical code, but
469 * should be fast enough to sort several hundred short strings (like filenames)
470 * with a reasonable delay.
471 *
472 * @param {string} str1 The string to compare in a numerically sensitive way.
473 * @param {string} str2 The string to compare {@code str1} to.
474 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
475 * 0 if str1 > str2.
476 */
477goog.string.intAwareCompare = function(str1, str2) {
478 return goog.string.numberAwareCompare_(str1, str2, /\d+|\D+/g);
479};
480
481
482/**
483 * String comparison function that handles non-negative integer and fractional
484 * numbers in a way humans might expect. Using this function, the string
485 * 'File 2.jpg' sorts before 'File 10.jpg', and '3.14' before '3.2'. Equivalent
486 * to {@link goog.string.intAwareCompare} apart from the way how it interprets
487 * dots.
488 *
489 * @param {string} str1 The string to compare in a numerically sensitive way.
490 * @param {string} str2 The string to compare {@code str1} to.
491 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
492 * 0 if str1 > str2.
493 */
494goog.string.floatAwareCompare = function(str1, str2) {
495 return goog.string.numberAwareCompare_(str1, str2, /\d+|\.\d+|\D+/g);
496};
497
498
499/**
500 * Alias for {@link goog.string.floatAwareCompare}.
501 *
502 * @param {string} str1
503 * @param {string} str2
504 * @return {number}
505 */
506goog.string.numerateCompare = goog.string.floatAwareCompare;
507
508
509/**
510 * URL-encodes a string
511 * @param {*} str The string to url-encode.
512 * @return {string} An encoded copy of {@code str} that is safe for urls.
513 * Note that '#', ':', and other characters used to delimit portions
514 * of URLs *will* be encoded.
515 */
516goog.string.urlEncode = function(str) {
517 return encodeURIComponent(String(str));
518};
519
520
521/**
522 * URL-decodes the string. We need to specially handle '+'s because
523 * the javascript library doesn't convert them to spaces.
524 * @param {string} str The string to url decode.
525 * @return {string} The decoded {@code str}.
526 */
527goog.string.urlDecode = function(str) {
528 return decodeURIComponent(str.replace(/\+/g, ' '));
529};
530
531
532/**
533 * Converts \n to <br>s or <br />s.
534 * @param {string} str The string in which to convert newlines.
535 * @param {boolean=} opt_xml Whether to use XML compatible tags.
536 * @return {string} A copy of {@code str} with converted newlines.
537 */
538goog.string.newLineToBr = function(str, opt_xml) {
539 return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>');
540};
541
542
543/**
544 * Escapes double quote '"' and single quote '\'' characters in addition to
545 * '&', '<', and '>' so that a string can be included in an HTML tag attribute
546 * value within double or single quotes.
547 *
548 * It should be noted that > doesn't need to be escaped for the HTML or XML to
549 * be valid, but it has been decided to escape it for consistency with other
550 * implementations.
551 *
552 * With goog.string.DETECT_DOUBLE_ESCAPING, this function escapes also the
553 * lowercase letter "e".
554 *
555 * NOTE(user):
556 * HtmlEscape is often called during the generation of large blocks of HTML.
557 * Using statics for the regular expressions and strings is an optimization
558 * that can more than half the amount of time IE spends in this function for
559 * large apps, since strings and regexes both contribute to GC allocations.
560 *
561 * Testing for the presence of a character before escaping increases the number
562 * of function calls, but actually provides a speed increase for the average
563 * case -- since the average case often doesn't require the escaping of all 4
564 * characters and indexOf() is much cheaper than replace().
565 * The worst case does suffer slightly from the additional calls, therefore the
566 * opt_isLikelyToContainHtmlChars option has been included for situations
567 * where all 4 HTML entities are very likely to be present and need escaping.
568 *
569 * Some benchmarks (times tended to fluctuate +-0.05ms):
570 * FireFox IE6
571 * (no chars / average (mix of cases) / all 4 chars)
572 * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80
573 * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84
574 * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85
575 *
576 * An additional advantage of checking if replace actually needs to be called
577 * is a reduction in the number of object allocations, so as the size of the
578 * application grows the difference between the various methods would increase.
579 *
580 * @param {string} str string to be escaped.
581 * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see
582 * if the character needs replacing - use this option if you expect each of
583 * the characters to appear often. Leave false if you expect few html
584 * characters to occur in your strings, such as if you are escaping HTML.
585 * @return {string} An escaped copy of {@code str}.
586 */
587goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) {
588
589 if (opt_isLikelyToContainHtmlChars) {
590 str = str.replace(goog.string.AMP_RE_, '&amp;')
591 .replace(goog.string.LT_RE_, '&lt;')
592 .replace(goog.string.GT_RE_, '&gt;')
593 .replace(goog.string.QUOT_RE_, '&quot;')
594 .replace(goog.string.SINGLE_QUOTE_RE_, '&#39;')
595 .replace(goog.string.NULL_RE_, '&#0;');
596 if (goog.string.DETECT_DOUBLE_ESCAPING) {
597 str = str.replace(goog.string.E_RE_, '&#101;');
598 }
599 return str;
600
601 } else {
602 // quick test helps in the case when there are no chars to replace, in
603 // worst case this makes barely a difference to the time taken
604 if (!goog.string.ALL_RE_.test(str)) return str;
605
606 // str.indexOf is faster than regex.test in this case
607 if (str.indexOf('&') != -1) {
608 str = str.replace(goog.string.AMP_RE_, '&amp;');
609 }
610 if (str.indexOf('<') != -1) {
611 str = str.replace(goog.string.LT_RE_, '&lt;');
612 }
613 if (str.indexOf('>') != -1) {
614 str = str.replace(goog.string.GT_RE_, '&gt;');
615 }
616 if (str.indexOf('"') != -1) {
617 str = str.replace(goog.string.QUOT_RE_, '&quot;');
618 }
619 if (str.indexOf('\'') != -1) {
620 str = str.replace(goog.string.SINGLE_QUOTE_RE_, '&#39;');
621 }
622 if (str.indexOf('\x00') != -1) {
623 str = str.replace(goog.string.NULL_RE_, '&#0;');
624 }
625 if (goog.string.DETECT_DOUBLE_ESCAPING && str.indexOf('e') != -1) {
626 str = str.replace(goog.string.E_RE_, '&#101;');
627 }
628 return str;
629 }
630};
631
632
633/**
634 * Regular expression that matches an ampersand, for use in escaping.
635 * @const {!RegExp}
636 * @private
637 */
638goog.string.AMP_RE_ = /&/g;
639
640
641/**
642 * Regular expression that matches a less than sign, for use in escaping.
643 * @const {!RegExp}
644 * @private
645 */
646goog.string.LT_RE_ = /</g;
647
648
649/**
650 * Regular expression that matches a greater than sign, for use in escaping.
651 * @const {!RegExp}
652 * @private
653 */
654goog.string.GT_RE_ = />/g;
655
656
657/**
658 * Regular expression that matches a double quote, for use in escaping.
659 * @const {!RegExp}
660 * @private
661 */
662goog.string.QUOT_RE_ = /"/g;
663
664
665/**
666 * Regular expression that matches a single quote, for use in escaping.
667 * @const {!RegExp}
668 * @private
669 */
670goog.string.SINGLE_QUOTE_RE_ = /'/g;
671
672
673/**
674 * Regular expression that matches null character, for use in escaping.
675 * @const {!RegExp}
676 * @private
677 */
678goog.string.NULL_RE_ = /\x00/g;
679
680
681/**
682 * Regular expression that matches a lowercase letter "e", for use in escaping.
683 * @const {!RegExp}
684 * @private
685 */
686goog.string.E_RE_ = /e/g;
687
688
689/**
690 * Regular expression that matches any character that needs to be escaped.
691 * @const {!RegExp}
692 * @private
693 */
694goog.string.ALL_RE_ = (goog.string.DETECT_DOUBLE_ESCAPING ?
695 /[\x00&<>"'e]/ :
696 /[\x00&<>"']/);
697
698
699/**
700 * Unescapes an HTML string.
701 *
702 * @param {string} str The string to unescape.
703 * @return {string} An unescaped copy of {@code str}.
704 */
705goog.string.unescapeEntities = function(str) {
706 if (goog.string.contains(str, '&')) {
707 // We are careful not to use a DOM if we do not have one or we explicitly
708 // requested non-DOM html unescaping.
709 if (!goog.string.FORCE_NON_DOM_HTML_UNESCAPING &&
710 'document' in goog.global) {
711 return goog.string.unescapeEntitiesUsingDom_(str);
712 } else {
713 // Fall back on pure XML entities
714 return goog.string.unescapePureXmlEntities_(str);
715 }
716 }
717 return str;
718};
719
720
721/**
722 * Unescapes a HTML string using the provided document.
723 *
724 * @param {string} str The string to unescape.
725 * @param {!Document} document A document to use in escaping the string.
726 * @return {string} An unescaped copy of {@code str}.
727 */
728goog.string.unescapeEntitiesWithDocument = function(str, document) {
729 if (goog.string.contains(str, '&')) {
730 return goog.string.unescapeEntitiesUsingDom_(str, document);
731 }
732 return str;
733};
734
735
736/**
737 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
738 * entities. This function is XSS-safe and whitespace-preserving.
739 * @private
740 * @param {string} str The string to unescape.
741 * @param {Document=} opt_document An optional document to use for creating
742 * elements. If this is not specified then the default window.document
743 * will be used.
744 * @return {string} The unescaped {@code str} string.
745 */
746goog.string.unescapeEntitiesUsingDom_ = function(str, opt_document) {
747 /** @type {!Object<string, string>} */
748 var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
749 var div;
750 if (opt_document) {
751 div = opt_document.createElement('div');
752 } else {
753 div = goog.global.document.createElement('div');
754 }
755 // Match as many valid entity characters as possible. If the actual entity
756 // happens to be shorter, it will still work as innerHTML will return the
757 // trailing characters unchanged. Since the entity characters do not include
758 // open angle bracket, there is no chance of XSS from the innerHTML use.
759 // Since no whitespace is passed to innerHTML, whitespace is preserved.
760 return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) {
761 // Check for cached entity.
762 var value = seen[s];
763 if (value) {
764 return value;
765 }
766 // Check for numeric entity.
767 if (entity.charAt(0) == '#') {
768 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
769 var n = Number('0' + entity.substr(1));
770 if (!isNaN(n)) {
771 value = String.fromCharCode(n);
772 }
773 }
774 // Fall back to innerHTML otherwise.
775 if (!value) {
776 // Append a non-entity character to avoid a bug in Webkit that parses
777 // an invalid entity at the end of innerHTML text as the empty string.
778 div.innerHTML = s + ' ';
779 // Then remove the trailing character from the result.
780 value = div.firstChild.nodeValue.slice(0, -1);
781 }
782 // Cache and return.
783 return seen[s] = value;
784 });
785};
786
787
788/**
789 * Unescapes XML entities.
790 * @private
791 * @param {string} str The string to unescape.
792 * @return {string} An unescaped copy of {@code str}.
793 */
794goog.string.unescapePureXmlEntities_ = function(str) {
795 return str.replace(/&([^;]+);/g, function(s, entity) {
796 switch (entity) {
797 case 'amp':
798 return '&';
799 case 'lt':
800 return '<';
801 case 'gt':
802 return '>';
803 case 'quot':
804 return '"';
805 default:
806 if (entity.charAt(0) == '#') {
807 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex.
808 var n = Number('0' + entity.substr(1));
809 if (!isNaN(n)) {
810 return String.fromCharCode(n);
811 }
812 }
813 // For invalid entities we just return the entity
814 return s;
815 }
816 });
817};
818
819
820/**
821 * Regular expression that matches an HTML entity.
822 * See also HTML5: Tokenization / Tokenizing character references.
823 * @private
824 * @type {!RegExp}
825 */
826goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g;
827
828
829/**
830 * Do escaping of whitespace to preserve spatial formatting. We use character
831 * entity #160 to make it safer for xml.
832 * @param {string} str The string in which to escape whitespace.
833 * @param {boolean=} opt_xml Whether to use XML compatible tags.
834 * @return {string} An escaped copy of {@code str}.
835 */
836goog.string.whitespaceEscape = function(str, opt_xml) {
837 // This doesn't use goog.string.preserveSpaces for backwards compatibility.
838 return goog.string.newLineToBr(str.replace(/ /g, ' &#160;'), opt_xml);
839};
840
841
842/**
843 * Preserve spaces that would be otherwise collapsed in HTML by replacing them
844 * with non-breaking space Unicode characters.
845 * @param {string} str The string in which to preserve whitespace.
846 * @return {string} A copy of {@code str} with preserved whitespace.
847 */
848goog.string.preserveSpaces = function(str) {
849 return str.replace(/(^|[\n ]) /g, '$1' + goog.string.Unicode.NBSP);
850};
851
852
853/**
854 * Strip quote characters around a string. The second argument is a string of
855 * characters to treat as quotes. This can be a single character or a string of
856 * multiple character and in that case each of those are treated as possible
857 * quote characters. For example:
858 *
859 * <pre>
860 * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
861 * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
862 * </pre>
863 *
864 * @param {string} str The string to strip.
865 * @param {string} quoteChars The quote characters to strip.
866 * @return {string} A copy of {@code str} without the quotes.
867 */
868goog.string.stripQuotes = function(str, quoteChars) {
869 var length = quoteChars.length;
870 for (var i = 0; i < length; i++) {
871 var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i);
872 if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) {
873 return str.substring(1, str.length - 1);
874 }
875 }
876 return str;
877};
878
879
880/**
881 * Truncates a string to a certain length and adds '...' if necessary. The
882 * length also accounts for the ellipsis, so a maximum length of 10 and a string
883 * 'Hello World!' produces 'Hello W...'.
884 * @param {string} str The string to truncate.
885 * @param {number} chars Max number of characters.
886 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
887 * characters from being cut off in the middle.
888 * @return {string} The truncated {@code str} string.
889 */
890goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) {
891 if (opt_protectEscapedCharacters) {
892 str = goog.string.unescapeEntities(str);
893 }
894
895 if (str.length > chars) {
896 str = str.substring(0, chars - 3) + '...';
897 }
898
899 if (opt_protectEscapedCharacters) {
900 str = goog.string.htmlEscape(str);
901 }
902
903 return str;
904};
905
906
907/**
908 * Truncate a string in the middle, adding "..." if necessary,
909 * and favoring the beginning of the string.
910 * @param {string} str The string to truncate the middle of.
911 * @param {number} chars Max number of characters.
912 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
913 * characters from being cutoff in the middle.
914 * @param {number=} opt_trailingChars Optional number of trailing characters to
915 * leave at the end of the string, instead of truncating as close to the
916 * middle as possible.
917 * @return {string} A truncated copy of {@code str}.
918 */
919goog.string.truncateMiddle = function(str, chars,
920 opt_protectEscapedCharacters, opt_trailingChars) {
921 if (opt_protectEscapedCharacters) {
922 str = goog.string.unescapeEntities(str);
923 }
924
925 if (opt_trailingChars && str.length > chars) {
926 if (opt_trailingChars > chars) {
927 opt_trailingChars = chars;
928 }
929 var endPoint = str.length - opt_trailingChars;
930 var startPoint = chars - opt_trailingChars;
931 str = str.substring(0, startPoint) + '...' + str.substring(endPoint);
932 } else if (str.length > chars) {
933 // Favor the beginning of the string:
934 var half = Math.floor(chars / 2);
935 var endPos = str.length - half;
936 half += chars % 2;
937 str = str.substring(0, half) + '...' + str.substring(endPos);
938 }
939
940 if (opt_protectEscapedCharacters) {
941 str = goog.string.htmlEscape(str);
942 }
943
944 return str;
945};
946
947
948/**
949 * Special chars that need to be escaped for goog.string.quote.
950 * @private {!Object<string, string>}
951 */
952goog.string.specialEscapeChars_ = {
953 '\0': '\\0',
954 '\b': '\\b',
955 '\f': '\\f',
956 '\n': '\\n',
957 '\r': '\\r',
958 '\t': '\\t',
959 '\x0B': '\\x0B', // '\v' is not supported in JScript
960 '"': '\\"',
961 '\\': '\\\\',
962 // To support the use case of embedding quoted strings inside of <script>
963 // tags, we have to make sure "<!--", "<script", and "</script" does not
964 // appear in the resulting string. The specific strings that must be escaped
965 // are documented at:
966 // http://www.w3.org/TR/html51/semantics.html#restrictions-for-contents-of-script-elements
967 '<': '\x3c'
968};
969
970
971/**
972 * Character mappings used internally for goog.string.escapeChar.
973 * @private {!Object<string, string>}
974 */
975goog.string.jsEscapeCache_ = {
976 '\'': '\\\''
977};
978
979
980/**
981 * Encloses a string in double quotes and escapes characters so that the
982 * string is a valid JS string. The resulting string is safe to embed in
983 * <script> tags as "<" is escaped.
984 * @param {string} s The string to quote.
985 * @return {string} A copy of {@code s} surrounded by double quotes.
986 */
987goog.string.quote = function(s) {
988 s = String(s);
989 var sb = ['"'];
990 for (var i = 0; i < s.length; i++) {
991 var ch = s.charAt(i);
992 var cc = ch.charCodeAt(0);
993 sb[i + 1] = goog.string.specialEscapeChars_[ch] ||
994 ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch));
995 }
996 sb.push('"');
997 return sb.join('');
998};
999
1000
1001/**
1002 * Takes a string and returns the escaped string for that character.
1003 * @param {string} str The string to escape.
1004 * @return {string} An escaped string representing {@code str}.
1005 */
1006goog.string.escapeString = function(str) {
1007 var sb = [];
1008 for (var i = 0; i < str.length; i++) {
1009 sb[i] = goog.string.escapeChar(str.charAt(i));
1010 }
1011 return sb.join('');
1012};
1013
1014
1015/**
1016 * Takes a character and returns the escaped string for that character. For
1017 * example escapeChar(String.fromCharCode(15)) -> "\\x0E".
1018 * @param {string} c The character to escape.
1019 * @return {string} An escaped string representing {@code c}.
1020 */
1021goog.string.escapeChar = function(c) {
1022 if (c in goog.string.jsEscapeCache_) {
1023 return goog.string.jsEscapeCache_[c];
1024 }
1025
1026 if (c in goog.string.specialEscapeChars_) {
1027 return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c];
1028 }
1029
1030 var rv = c;
1031 var cc = c.charCodeAt(0);
1032 if (cc > 31 && cc < 127) {
1033 rv = c;
1034 } else {
1035 // tab is 9 but handled above
1036 if (cc < 256) {
1037 rv = '\\x';
1038 if (cc < 16 || cc > 256) {
1039 rv += '0';
1040 }
1041 } else {
1042 rv = '\\u';
1043 if (cc < 4096) { // \u1000
1044 rv += '0';
1045 }
1046 }
1047 rv += cc.toString(16).toUpperCase();
1048 }
1049
1050 return goog.string.jsEscapeCache_[c] = rv;
1051};
1052
1053
1054/**
1055 * Determines whether a string contains a substring.
1056 * @param {string} str The string to search.
1057 * @param {string} subString The substring to search for.
1058 * @return {boolean} Whether {@code str} contains {@code subString}.
1059 */
1060goog.string.contains = function(str, subString) {
1061 return str.indexOf(subString) != -1;
1062};
1063
1064
1065/**
1066 * Determines whether a string contains a substring, ignoring case.
1067 * @param {string} str The string to search.
1068 * @param {string} subString The substring to search for.
1069 * @return {boolean} Whether {@code str} contains {@code subString}.
1070 */
1071goog.string.caseInsensitiveContains = function(str, subString) {
1072 return goog.string.contains(str.toLowerCase(), subString.toLowerCase());
1073};
1074
1075
1076/**
1077 * Returns the non-overlapping occurrences of ss in s.
1078 * If either s or ss evalutes to false, then returns zero.
1079 * @param {string} s The string to look in.
1080 * @param {string} ss The string to look for.
1081 * @return {number} Number of occurrences of ss in s.
1082 */
1083goog.string.countOf = function(s, ss) {
1084 return s && ss ? s.split(ss).length - 1 : 0;
1085};
1086
1087
1088/**
1089 * Removes a substring of a specified length at a specific
1090 * index in a string.
1091 * @param {string} s The base string from which to remove.
1092 * @param {number} index The index at which to remove the substring.
1093 * @param {number} stringLength The length of the substring to remove.
1094 * @return {string} A copy of {@code s} with the substring removed or the full
1095 * string if nothing is removed or the input is invalid.
1096 */
1097goog.string.removeAt = function(s, index, stringLength) {
1098 var resultStr = s;
1099 // If the index is greater or equal to 0 then remove substring
1100 if (index >= 0 && index < s.length && stringLength > 0) {
1101 resultStr = s.substr(0, index) +
1102 s.substr(index + stringLength, s.length - index - stringLength);
1103 }
1104 return resultStr;
1105};
1106
1107
1108/**
1109 * Removes the first occurrence of a substring from a string.
1110 * @param {string} s The base string from which to remove.
1111 * @param {string} ss The string to remove.
1112 * @return {string} A copy of {@code s} with {@code ss} removed or the full
1113 * string if nothing is removed.
1114 */
1115goog.string.remove = function(s, ss) {
1116 var re = new RegExp(goog.string.regExpEscape(ss), '');
1117 return s.replace(re, '');
1118};
1119
1120
1121/**
1122 * Removes all occurrences of a substring from a string.
1123 * @param {string} s The base string from which to remove.
1124 * @param {string} ss The string to remove.
1125 * @return {string} A copy of {@code s} with {@code ss} removed or the full
1126 * string if nothing is removed.
1127 */
1128goog.string.removeAll = function(s, ss) {
1129 var re = new RegExp(goog.string.regExpEscape(ss), 'g');
1130 return s.replace(re, '');
1131};
1132
1133
1134/**
1135 * Escapes characters in the string that are not safe to use in a RegExp.
1136 * @param {*} s The string to escape. If not a string, it will be casted
1137 * to one.
1138 * @return {string} A RegExp safe, escaped copy of {@code s}.
1139 */
1140goog.string.regExpEscape = function(s) {
1141 return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
1142 replace(/\x08/g, '\\x08');
1143};
1144
1145
1146/**
1147 * Repeats a string n times.
1148 * @param {string} string The string to repeat.
1149 * @param {number} length The number of times to repeat.
1150 * @return {string} A string containing {@code length} repetitions of
1151 * {@code string}.
1152 */
1153goog.string.repeat = (String.prototype.repeat) ?
1154 function(string, length) {
1155 // The native method is over 100 times faster than the alternative.
1156 return string.repeat(length);
1157 } :
1158 function(string, length) {
1159 return new Array(length + 1).join(string);
1160 };
1161
1162
1163/**
1164 * Pads number to given length and optionally rounds it to a given precision.
1165 * For example:
1166 * <pre>padNumber(1.25, 2, 3) -> '01.250'
1167 * padNumber(1.25, 2) -> '01.25'
1168 * padNumber(1.25, 2, 1) -> '01.3'
1169 * padNumber(1.25, 0) -> '1.25'</pre>
1170 *
1171 * @param {number} num The number to pad.
1172 * @param {number} length The desired length.
1173 * @param {number=} opt_precision The desired precision.
1174 * @return {string} {@code num} as a string with the given options.
1175 */
1176goog.string.padNumber = function(num, length, opt_precision) {
1177 var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num);
1178 var index = s.indexOf('.');
1179 if (index == -1) {
1180 index = s.length;
1181 }
1182 return goog.string.repeat('0', Math.max(0, length - index)) + s;
1183};
1184
1185
1186/**
1187 * Returns a string representation of the given object, with
1188 * null and undefined being returned as the empty string.
1189 *
1190 * @param {*} obj The object to convert.
1191 * @return {string} A string representation of the {@code obj}.
1192 */
1193goog.string.makeSafe = function(obj) {
1194 return obj == null ? '' : String(obj);
1195};
1196
1197
1198/**
1199 * Concatenates string expressions. This is useful
1200 * since some browsers are very inefficient when it comes to using plus to
1201 * concat strings. Be careful when using null and undefined here since
1202 * these will not be included in the result. If you need to represent these
1203 * be sure to cast the argument to a String first.
1204 * For example:
1205 * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd'
1206 * buildString(null, undefined) -> ''
1207 * </pre>
1208 * @param {...*} var_args A list of strings to concatenate. If not a string,
1209 * it will be casted to one.
1210 * @return {string} The concatenation of {@code var_args}.
1211 */
1212goog.string.buildString = function(var_args) {
1213 return Array.prototype.join.call(arguments, '');
1214};
1215
1216
1217/**
1218 * Returns a string with at least 64-bits of randomness.
1219 *
1220 * Doesn't trust Javascript's random function entirely. Uses a combination of
1221 * random and current timestamp, and then encodes the string in base-36 to
1222 * make it shorter.
1223 *
1224 * @return {string} A random string, e.g. sn1s7vb4gcic.
1225 */
1226goog.string.getRandomString = function() {
1227 var x = 2147483648;
1228 return Math.floor(Math.random() * x).toString(36) +
1229 Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36);
1230};
1231
1232
1233/**
1234 * Compares two version numbers.
1235 *
1236 * @param {string|number} version1 Version of first item.
1237 * @param {string|number} version2 Version of second item.
1238 *
1239 * @return {number} 1 if {@code version1} is higher.
1240 * 0 if arguments are equal.
1241 * -1 if {@code version2} is higher.
1242 */
1243goog.string.compareVersions = function(version1, version2) {
1244 var order = 0;
1245 // Trim leading and trailing whitespace and split the versions into
1246 // subversions.
1247 var v1Subs = goog.string.trim(String(version1)).split('.');
1248 var v2Subs = goog.string.trim(String(version2)).split('.');
1249 var subCount = Math.max(v1Subs.length, v2Subs.length);
1250
1251 // Iterate over the subversions, as long as they appear to be equivalent.
1252 for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) {
1253 var v1Sub = v1Subs[subIdx] || '';
1254 var v2Sub = v2Subs[subIdx] || '';
1255
1256 // Split the subversions into pairs of numbers and qualifiers (like 'b').
1257 // Two different RegExp objects are needed because they are both using
1258 // the 'g' flag.
1259 var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1260 var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1261 do {
1262 var v1Comp = v1CompParser.exec(v1Sub) || ['', '', ''];
1263 var v2Comp = v2CompParser.exec(v2Sub) || ['', '', ''];
1264 // Break if there are no more matches.
1265 if (v1Comp[0].length == 0 && v2Comp[0].length == 0) {
1266 break;
1267 }
1268
1269 // Parse the numeric part of the subversion. A missing number is
1270 // equivalent to 0.
1271 var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10);
1272 var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10);
1273
1274 // Compare the subversion components. The number has the highest
1275 // precedence. Next, if the numbers are equal, a subversion without any
1276 // qualifier is always higher than a subversion with any qualifier. Next,
1277 // the qualifiers are compared as strings.
1278 order = goog.string.compareElements_(v1CompNum, v2CompNum) ||
1279 goog.string.compareElements_(v1Comp[2].length == 0,
1280 v2Comp[2].length == 0) ||
1281 goog.string.compareElements_(v1Comp[2], v2Comp[2]);
1282 // Stop as soon as an inequality is discovered.
1283 } while (order == 0);
1284 }
1285
1286 return order;
1287};
1288
1289
1290/**
1291 * Compares elements of a version number.
1292 *
1293 * @param {string|number|boolean} left An element from a version number.
1294 * @param {string|number|boolean} right An element from a version number.
1295 *
1296 * @return {number} 1 if {@code left} is higher.
1297 * 0 if arguments are equal.
1298 * -1 if {@code right} is higher.
1299 * @private
1300 */
1301goog.string.compareElements_ = function(left, right) {
1302 if (left < right) {
1303 return -1;
1304 } else if (left > right) {
1305 return 1;
1306 }
1307 return 0;
1308};
1309
1310
1311/**
1312 * String hash function similar to java.lang.String.hashCode().
1313 * The hash code for a string is computed as
1314 * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
1315 * where s[i] is the ith character of the string and n is the length of
1316 * the string. We mod the result to make it between 0 (inclusive) and 2^32
1317 * (exclusive).
1318 * @param {string} str A string.
1319 * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32
1320 * (exclusive). The empty string returns 0.
1321 */
1322goog.string.hashCode = function(str) {
1323 var result = 0;
1324 for (var i = 0; i < str.length; ++i) {
1325 // Normalize to 4 byte range, 0 ... 2^32.
1326 result = (31 * result + str.charCodeAt(i)) >>> 0;
1327 }
1328 return result;
1329};
1330
1331
1332/**
1333 * The most recent unique ID. |0 is equivalent to Math.floor in this case.
1334 * @type {number}
1335 * @private
1336 */
1337goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0;
1338
1339
1340/**
1341 * Generates and returns a string which is unique in the current document.
1342 * This is useful, for example, to create unique IDs for DOM elements.
1343 * @return {string} A unique id.
1344 */
1345goog.string.createUniqueString = function() {
1346 return 'goog_' + goog.string.uniqueStringCounter_++;
1347};
1348
1349
1350/**
1351 * Converts the supplied string to a number, which may be Infinity or NaN.
1352 * This function strips whitespace: (toNumber(' 123') === 123)
1353 * This function accepts scientific notation: (toNumber('1e1') === 10)
1354 *
1355 * This is better than Javascript's built-in conversions because, sadly:
1356 * (Number(' ') === 0) and (parseFloat('123a') === 123)
1357 *
1358 * @param {string} str The string to convert.
1359 * @return {number} The number the supplied string represents, or NaN.
1360 */
1361goog.string.toNumber = function(str) {
1362 var num = Number(str);
1363 if (num == 0 && goog.string.isEmptyOrWhitespace(str)) {
1364 return NaN;
1365 }
1366 return num;
1367};
1368
1369
1370/**
1371 * Returns whether the given string is lower camel case (e.g. "isFooBar").
1372 *
1373 * Note that this assumes the string is entirely letters.
1374 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1375 *
1376 * @param {string} str String to test.
1377 * @return {boolean} Whether the string is lower camel case.
1378 */
1379goog.string.isLowerCamelCase = function(str) {
1380 return /^[a-z]+([A-Z][a-z]*)*$/.test(str);
1381};
1382
1383
1384/**
1385 * Returns whether the given string is upper camel case (e.g. "FooBarBaz").
1386 *
1387 * Note that this assumes the string is entirely letters.
1388 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1389 *
1390 * @param {string} str String to test.
1391 * @return {boolean} Whether the string is upper camel case.
1392 */
1393goog.string.isUpperCamelCase = function(str) {
1394 return /^([A-Z][a-z]*)+$/.test(str);
1395};
1396
1397
1398/**
1399 * Converts a string from selector-case to camelCase (e.g. from
1400 * "multi-part-string" to "multiPartString"), useful for converting
1401 * CSS selectors and HTML dataset keys to their equivalent JS properties.
1402 * @param {string} str The string in selector-case form.
1403 * @return {string} The string in camelCase form.
1404 */
1405goog.string.toCamelCase = function(str) {
1406 return String(str).replace(/\-([a-z])/g, function(all, match) {
1407 return match.toUpperCase();
1408 });
1409};
1410
1411
1412/**
1413 * Converts a string from camelCase to selector-case (e.g. from
1414 * "multiPartString" to "multi-part-string"), useful for converting JS
1415 * style and dataset properties to equivalent CSS selectors and HTML keys.
1416 * @param {string} str The string in camelCase form.
1417 * @return {string} The string in selector-case form.
1418 */
1419goog.string.toSelectorCase = function(str) {
1420 return String(str).replace(/([A-Z])/g, '-$1').toLowerCase();
1421};
1422
1423
1424/**
1425 * Converts a string into TitleCase. First character of the string is always
1426 * capitalized in addition to the first letter of every subsequent word.
1427 * Words are delimited by one or more whitespaces by default. Custom delimiters
1428 * can optionally be specified to replace the default, which doesn't preserve
1429 * whitespace delimiters and instead must be explicitly included if needed.
1430 *
1431 * Default delimiter => " ":
1432 * goog.string.toTitleCase('oneTwoThree') => 'OneTwoThree'
1433 * goog.string.toTitleCase('one two three') => 'One Two Three'
1434 * goog.string.toTitleCase(' one two ') => ' One Two '
1435 * goog.string.toTitleCase('one_two_three') => 'One_two_three'
1436 * goog.string.toTitleCase('one-two-three') => 'One-two-three'
1437 *
1438 * Custom delimiter => "_-.":
1439 * goog.string.toTitleCase('oneTwoThree', '_-.') => 'OneTwoThree'
1440 * goog.string.toTitleCase('one two three', '_-.') => 'One two three'
1441 * goog.string.toTitleCase(' one two ', '_-.') => ' one two '
1442 * goog.string.toTitleCase('one_two_three', '_-.') => 'One_Two_Three'
1443 * goog.string.toTitleCase('one-two-three', '_-.') => 'One-Two-Three'
1444 * goog.string.toTitleCase('one...two...three', '_-.') => 'One...Two...Three'
1445 * goog.string.toTitleCase('one. two. three', '_-.') => 'One. two. three'
1446 * goog.string.toTitleCase('one-two.three', '_-.') => 'One-Two.Three'
1447 *
1448 * @param {string} str String value in camelCase form.
1449 * @param {string=} opt_delimiters Custom delimiter character set used to
1450 * distinguish words in the string value. Each character represents a
1451 * single delimiter. When provided, default whitespace delimiter is
1452 * overridden and must be explicitly included if needed.
1453 * @return {string} String value in TitleCase form.
1454 */
1455goog.string.toTitleCase = function(str, opt_delimiters) {
1456 var delimiters = goog.isString(opt_delimiters) ?
1457 goog.string.regExpEscape(opt_delimiters) : '\\s';
1458
1459 // For IE8, we need to prevent using an empty character set. Otherwise,
1460 // incorrect matching will occur.
1461 delimiters = delimiters ? '|[' + delimiters + ']+' : '';
1462
1463 var regexp = new RegExp('(^' + delimiters + ')([a-z])', 'g');
1464 return str.replace(regexp, function(all, p1, p2) {
1465 return p1 + p2.toUpperCase();
1466 });
1467};
1468
1469
1470/**
1471 * Capitalizes a string, i.e. converts the first letter to uppercase
1472 * and all other letters to lowercase, e.g.:
1473 *
1474 * goog.string.capitalize('one') => 'One'
1475 * goog.string.capitalize('ONE') => 'One'
1476 * goog.string.capitalize('one two') => 'One two'
1477 *
1478 * Note that this function does not trim initial whitespace.
1479 *
1480 * @param {string} str String value to capitalize.
1481 * @return {string} String value with first letter in uppercase.
1482 */
1483goog.string.capitalize = function(str) {
1484 return String(str.charAt(0)).toUpperCase() +
1485 String(str.substr(1)).toLowerCase();
1486};
1487
1488
1489/**
1490 * Parse a string in decimal or hexidecimal ('0xFFFF') form.
1491 *
1492 * To parse a particular radix, please use parseInt(string, radix) directly. See
1493 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/parseInt
1494 *
1495 * This is a wrapper for the built-in parseInt function that will only parse
1496 * numbers as base 10 or base 16. Some JS implementations assume strings
1497 * starting with "0" are intended to be octal. ES3 allowed but discouraged
1498 * this behavior. ES5 forbids it. This function emulates the ES5 behavior.
1499 *
1500 * For more information, see Mozilla JS Reference: http://goo.gl/8RiFj
1501 *
1502 * @param {string|number|null|undefined} value The value to be parsed.
1503 * @return {number} The number, parsed. If the string failed to parse, this
1504 * will be NaN.
1505 */
1506goog.string.parseInt = function(value) {
1507 // Force finite numbers to strings.
1508 if (isFinite(value)) {
1509 value = String(value);
1510 }
1511
1512 if (goog.isString(value)) {
1513 // If the string starts with '0x' or '-0x', parse as hex.
1514 return /^\s*-?0x/i.test(value) ?
1515 parseInt(value, 16) : parseInt(value, 10);
1516 }
1517
1518 return NaN;
1519};
1520
1521
1522/**
1523 * Splits a string on a separator a limited number of times.
1524 *
1525 * This implementation is more similar to Python or Java, where the limit
1526 * parameter specifies the maximum number of splits rather than truncating
1527 * the number of results.
1528 *
1529 * See http://docs.python.org/2/library/stdtypes.html#str.split
1530 * See JavaDoc: http://goo.gl/F2AsY
1531 * See Mozilla reference: http://goo.gl/dZdZs
1532 *
1533 * @param {string} str String to split.
1534 * @param {string} separator The separator.
1535 * @param {number} limit The limit to the number of splits. The resulting array
1536 * will have a maximum length of limit+1. Negative numbers are the same
1537 * as zero.
1538 * @return {!Array<string>} The string, split.
1539 */
1540
1541goog.string.splitLimit = function(str, separator, limit) {
1542 var parts = str.split(separator);
1543 var returnVal = [];
1544
1545 // Only continue doing this while we haven't hit the limit and we have
1546 // parts left.
1547 while (limit > 0 && parts.length) {
1548 returnVal.push(parts.shift());
1549 limit--;
1550 }
1551
1552 // If there are remaining parts, append them to the end.
1553 if (parts.length) {
1554 returnVal.push(parts.join(separator));
1555 }
1556
1557 return returnVal;
1558};
1559
1560
1561/**
1562 * Computes the Levenshtein edit distance between two strings.
1563 * @param {string} a
1564 * @param {string} b
1565 * @return {number} The edit distance between the two strings.
1566 */
1567goog.string.editDistance = function(a, b) {
1568 var v0 = [];
1569 var v1 = [];
1570
1571 if (a == b) {
1572 return 0;
1573 }
1574
1575 if (!a.length || !b.length) {
1576 return Math.max(a.length, b.length);
1577 }
1578
1579 for (var i = 0; i < b.length + 1; i++) {
1580 v0[i] = i;
1581 }
1582
1583 for (var i = 0; i < a.length; i++) {
1584 v1[0] = i + 1;
1585
1586 for (var j = 0; j < b.length; j++) {
1587 var cost = a[i] != b[j];
1588 // Cost for the substring is the minimum of adding one character, removing
1589 // one character, or a swap.
1590 v1[j + 1] = Math.min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost);
1591 }
1592
1593 for (var j = 0; j < v0.length; j++) {
1594 v0[j] = v1[j];
1595 }
1596 }
1597
1598 return v1[b.length];
1599};