lib/goog/string/string.js

1// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Utilities for string manipulation.
17 * @author arv@google.com (Erik Arvidsson)
18 */
19
20
21/**
22 * Namespace for string utilities
23 */
24goog.provide('goog.string');
25goog.provide('goog.string.Unicode');
26
27
28/**
29 * @define {boolean} Enables HTML escaping of lowercase letter "e" which helps
30 * with detection of double-escaping as this letter is frequently used.
31 */
32goog.define('goog.string.DETECT_DOUBLE_ESCAPING', false);
33
34
35/**
36 * @define {boolean} Whether to force non-dom html unescaping.
37 */
38goog.define('goog.string.FORCE_NON_DOM_HTML_UNESCAPING', false);
39
40
41/**
42 * Common Unicode string characters.
43 * @enum {string}
44 */
45goog.string.Unicode = {
46 NBSP: '\xa0'
47};
48
49
50/**
51 * Fast prefix-checker.
52 * @param {string} str The string to check.
53 * @param {string} prefix A string to look for at the start of {@code str}.
54 * @return {boolean} True if {@code str} begins with {@code prefix}.
55 */
56goog.string.startsWith = function(str, prefix) {
57 return str.lastIndexOf(prefix, 0) == 0;
58};
59
60
61/**
62 * Fast suffix-checker.
63 * @param {string} str The string to check.
64 * @param {string} suffix A string to look for at the end of {@code str}.
65 * @return {boolean} True if {@code str} ends with {@code suffix}.
66 */
67goog.string.endsWith = function(str, suffix) {
68 var l = str.length - suffix.length;
69 return l >= 0 && str.indexOf(suffix, l) == l;
70};
71
72
73/**
74 * Case-insensitive prefix-checker.
75 * @param {string} str The string to check.
76 * @param {string} prefix A string to look for at the end of {@code str}.
77 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
78 * case).
79 */
80goog.string.caseInsensitiveStartsWith = function(str, prefix) {
81 return goog.string.caseInsensitiveCompare(
82 prefix, str.substr(0, prefix.length)) == 0;
83};
84
85
86/**
87 * Case-insensitive suffix-checker.
88 * @param {string} str The string to check.
89 * @param {string} suffix A string to look for at the end of {@code str}.
90 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
91 * case).
92 */
93goog.string.caseInsensitiveEndsWith = function(str, suffix) {
94 return goog.string.caseInsensitiveCompare(
95 suffix, str.substr(str.length - suffix.length, suffix.length)) == 0;
96};
97
98
99/**
100 * Case-insensitive equality checker.
101 * @param {string} str1 First string to check.
102 * @param {string} str2 Second string to check.
103 * @return {boolean} True if {@code str1} and {@code str2} are the same string,
104 * ignoring case.
105 */
106goog.string.caseInsensitiveEquals = function(str1, str2) {
107 return str1.toLowerCase() == str2.toLowerCase();
108};
109
110
111/**
112 * Does simple python-style string substitution.
113 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
114 * @param {string} str The string containing the pattern.
115 * @param {...*} var_args The items to substitute into the pattern.
116 * @return {string} A copy of {@code str} in which each occurrence of
117 * {@code %s} has been replaced an argument from {@code var_args}.
118 */
119goog.string.subs = function(str, var_args) {
120 var splitParts = str.split('%s');
121 var returnString = '';
122
123 var subsArguments = Array.prototype.slice.call(arguments, 1);
124 while (subsArguments.length &&
125 // Replace up to the last split part. We are inserting in the
126 // positions between split parts.
127 splitParts.length > 1) {
128 returnString += splitParts.shift() + subsArguments.shift();
129 }
130
131 return returnString + splitParts.join('%s'); // Join unused '%s'
132};
133
134
135/**
136 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
137 * and tabs) to a single space, and strips leading and trailing whitespace.
138 * @param {string} str Input string.
139 * @return {string} A copy of {@code str} with collapsed whitespace.
140 */
141goog.string.collapseWhitespace = function(str) {
142 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
143 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
144 // include it in the regexp to enforce consistent cross-browser behavior.
145 return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
146};
147
148
149/**
150 * Checks if a string is empty or contains only whitespaces.
151 * @param {string} str The string to check.
152 * @return {boolean} Whether {@code str} is empty or whitespace only.
153 */
154goog.string.isEmptyOrWhitespace = function(str) {
155 // testing length == 0 first is actually slower in all browsers (about the
156 // same in Opera).
157 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
158 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
159 // include it in the regexp to enforce consistent cross-browser behavior.
160 return /^[\s\xa0]*$/.test(str);
161};
162
163
164/**
165 * Checks if a string is empty.
166 * @param {string} str The string to check.
167 * @return {boolean} Whether {@code str} is empty.
168 */
169goog.string.isEmptyString = function(str) {
170 return str.length == 0;
171};
172
173
174/**
175 * Checks if a string is empty or contains only whitespaces.
176 *
177 * TODO(user): Deprecate this when clients have been switched over to
178 * goog.string.isEmptyOrWhitespace.
179 *
180 * @param {string} str The string to check.
181 * @return {boolean} Whether {@code str} is empty or whitespace only.
182 */
183goog.string.isEmpty = goog.string.isEmptyOrWhitespace;
184
185
186/**
187 * Checks if a string is null, undefined, empty or contains only whitespaces.
188 * @param {*} str The string to check.
189 * @return {boolean} Whether {@code str} is null, undefined, empty, or
190 * whitespace only.
191 * @deprecated Use goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str))
192 * instead.
193 */
194goog.string.isEmptyOrWhitespaceSafe = function(str) {
195 return goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str));
196};
197
198
199/**
200 * Checks if a string is null, undefined, empty or contains only whitespaces.
201 *
202 * TODO(user): Deprecate this when clients have been switched over to
203 * goog.string.isEmptyOrWhitespaceSafe.
204 *
205 * @param {*} str The string to check.
206 * @return {boolean} Whether {@code str} is null, undefined, empty, or
207 * whitespace only.
208 */
209goog.string.isEmptySafe = goog.string.isEmptyOrWhitespaceSafe;
210
211
212/**
213 * Checks if a string is all breaking whitespace.
214 * @param {string} str The string to check.
215 * @return {boolean} Whether the string is all breaking whitespace.
216 */
217goog.string.isBreakingWhitespace = function(str) {
218 return !/[^\t\n\r ]/.test(str);
219};
220
221
222/**
223 * Checks if a string contains all letters.
224 * @param {string} str string to check.
225 * @return {boolean} True if {@code str} consists entirely of letters.
226 */
227goog.string.isAlpha = function(str) {
228 return !/[^a-zA-Z]/.test(str);
229};
230
231
232/**
233 * Checks if a string contains only numbers.
234 * @param {*} str string to check. If not a string, it will be
235 * casted to one.
236 * @return {boolean} True if {@code str} is numeric.
237 */
238goog.string.isNumeric = function(str) {
239 return !/[^0-9]/.test(str);
240};
241
242
243/**
244 * Checks if a string contains only numbers or letters.
245 * @param {string} str string to check.
246 * @return {boolean} True if {@code str} is alphanumeric.
247 */
248goog.string.isAlphaNumeric = function(str) {
249 return !/[^a-zA-Z0-9]/.test(str);
250};
251
252
253/**
254 * Checks if a character is a space character.
255 * @param {string} ch Character to check.
256 * @return {boolean} True if {@code ch} is a space.
257 */
258goog.string.isSpace = function(ch) {
259 return ch == ' ';
260};
261
262
263/**
264 * Checks if a character is a valid unicode character.
265 * @param {string} ch Character to check.
266 * @return {boolean} True if {@code ch} is a valid unicode character.
267 */
268goog.string.isUnicodeChar = function(ch) {
269 return ch.length == 1 && ch >= ' ' && ch <= '~' ||
270 ch >= '\u0080' && ch <= '\uFFFD';
271};
272
273
274/**
275 * Takes a string and replaces newlines with a space. Multiple lines are
276 * replaced with a single space.
277 * @param {string} str The string from which to strip newlines.
278 * @return {string} A copy of {@code str} stripped of newlines.
279 */
280goog.string.stripNewlines = function(str) {
281 return str.replace(/(\r\n|\r|\n)+/g, ' ');
282};
283
284
285/**
286 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
287 * @param {string} str The string to in which to canonicalize newlines.
288 * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
289 */
290goog.string.canonicalizeNewlines = function(str) {
291 return str.replace(/(\r\n|\r|\n)/g, '\n');
292};
293
294
295/**
296 * Normalizes whitespace in a string, replacing all whitespace chars with
297 * a space.
298 * @param {string} str The string in which to normalize whitespace.
299 * @return {string} A copy of {@code str} with all whitespace normalized.
300 */
301goog.string.normalizeWhitespace = function(str) {
302 return str.replace(/\xa0|\s/g, ' ');
303};
304
305
306/**
307 * Normalizes spaces in a string, replacing all consecutive spaces and tabs
308 * with a single space. Replaces non-breaking space with a space.
309 * @param {string} str The string in which to normalize spaces.
310 * @return {string} A copy of {@code str} with all consecutive spaces and tabs
311 * replaced with a single space.
312 */
313goog.string.normalizeSpaces = function(str) {
314 return str.replace(/\xa0|[ \t]+/g, ' ');
315};
316
317
318/**
319 * Removes the breaking spaces from the left and right of the string and
320 * collapses the sequences of breaking spaces in the middle into single spaces.
321 * The original and the result strings render the same way in HTML.
322 * @param {string} str A string in which to collapse spaces.
323 * @return {string} Copy of the string with normalized breaking spaces.
324 */
325goog.string.collapseBreakingSpaces = function(str) {
326 return str.replace(/[\t\r\n ]+/g, ' ').replace(
327 /^[\t\r\n ]+|[\t\r\n ]+$/g, '');
328};
329
330
331/**
332 * Trims white spaces to the left and right of a string.
333 * @param {string} str The string to trim.
334 * @return {string} A trimmed copy of {@code str}.
335 */
336goog.string.trim = (goog.TRUSTED_SITE && String.prototype.trim) ?
337 function(str) {
338 return str.trim();
339 } :
340 function(str) {
341 // Since IE doesn't include non-breaking-space (0xa0) in their \s
342 // character class (as required by section 7.2 of the ECMAScript spec),
343 // we explicitly include it in the regexp to enforce consistent
344 // cross-browser behavior.
345 return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
346 };
347
348
349/**
350 * Trims whitespaces at the left end of a string.
351 * @param {string} str The string to left trim.
352 * @return {string} A trimmed copy of {@code str}.
353 */
354goog.string.trimLeft = function(str) {
355 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
356 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
357 // include it in the regexp to enforce consistent cross-browser behavior.
358 return str.replace(/^[\s\xa0]+/, '');
359};
360
361
362/**
363 * Trims whitespaces at the right end of a string.
364 * @param {string} str The string to right trim.
365 * @return {string} A trimmed copy of {@code str}.
366 */
367goog.string.trimRight = function(str) {
368 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
369 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
370 // include it in the regexp to enforce consistent cross-browser behavior.
371 return str.replace(/[\s\xa0]+$/, '');
372};
373
374
375/**
376 * A string comparator that ignores case.
377 * -1 = str1 less than str2
378 * 0 = str1 equals str2
379 * 1 = str1 greater than str2
380 *
381 * @param {string} str1 The string to compare.
382 * @param {string} str2 The string to compare {@code str1} to.
383 * @return {number} The comparator result, as described above.
384 */
385goog.string.caseInsensitiveCompare = function(str1, str2) {
386 var test1 = String(str1).toLowerCase();
387 var test2 = String(str2).toLowerCase();
388
389 if (test1 < test2) {
390 return -1;
391 } else if (test1 == test2) {
392 return 0;
393 } else {
394 return 1;
395 }
396};
397
398
399/**
400 * Regular expression used for splitting a string into substrings of fractional
401 * numbers, integers, and non-numeric characters.
402 * @type {RegExp}
403 * @private
404 */
405goog.string.numerateCompareRegExp_ = /(\.\d+)|(\d+)|(\D+)/g;
406
407
408/**
409 * String comparison function that handles numbers in a way humans might expect.
410 * Using this function, the string "File 2.jpg" sorts before "File 10.jpg". The
411 * comparison is mostly case-insensitive, though strings that are identical
412 * except for case are sorted with the upper-case strings before lower-case.
413 *
414 * This comparison function is significantly slower (about 500x) than either
415 * the default or the case-insensitive compare. It should not be used in
416 * time-critical code, but should be fast enough to sort several hundred short
417 * strings (like filenames) with a reasonable delay.
418 *
419 * @param {string} str1 The string to compare in a numerically sensitive way.
420 * @param {string} str2 The string to compare {@code str1} to.
421 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
422 * 0 if str1 > str2.
423 */
424goog.string.numerateCompare = function(str1, str2) {
425 if (str1 == str2) {
426 return 0;
427 }
428 if (!str1) {
429 return -1;
430 }
431 if (!str2) {
432 return 1;
433 }
434
435 // Using match to split the entire string ahead of time turns out to be faster
436 // for most inputs than using RegExp.exec or iterating over each character.
437 var tokens1 = str1.toLowerCase().match(goog.string.numerateCompareRegExp_);
438 var tokens2 = str2.toLowerCase().match(goog.string.numerateCompareRegExp_);
439
440 var count = Math.min(tokens1.length, tokens2.length);
441
442 for (var i = 0; i < count; i++) {
443 var a = tokens1[i];
444 var b = tokens2[i];
445
446 // Compare pairs of tokens, returning if one token sorts before the other.
447 if (a != b) {
448
449 // Only if both tokens are integers is a special comparison required.
450 // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
451 var num1 = parseInt(a, 10);
452 if (!isNaN(num1)) {
453 var num2 = parseInt(b, 10);
454 if (!isNaN(num2) && num1 - num2) {
455 return num1 - num2;
456 }
457 }
458 return a < b ? -1 : 1;
459 }
460 }
461
462 // If one string is a substring of the other, the shorter string sorts first.
463 if (tokens1.length != tokens2.length) {
464 return tokens1.length - tokens2.length;
465 }
466
467 // The two strings must be equivalent except for case (perfect equality is
468 // tested at the head of the function.) Revert to default ASCII-betical string
469 // comparison to stablize the sort.
470 return str1 < str2 ? -1 : 1;
471};
472
473
474/**
475 * URL-encodes a string
476 * @param {*} str The string to url-encode.
477 * @return {string} An encoded copy of {@code str} that is safe for urls.
478 * Note that '#', ':', and other characters used to delimit portions
479 * of URLs *will* be encoded.
480 */
481goog.string.urlEncode = function(str) {
482 return encodeURIComponent(String(str));
483};
484
485
486/**
487 * URL-decodes the string. We need to specially handle '+'s because
488 * the javascript library doesn't convert them to spaces.
489 * @param {string} str The string to url decode.
490 * @return {string} The decoded {@code str}.
491 */
492goog.string.urlDecode = function(str) {
493 return decodeURIComponent(str.replace(/\+/g, ' '));
494};
495
496
497/**
498 * Converts \n to <br>s or <br />s.
499 * @param {string} str The string in which to convert newlines.
500 * @param {boolean=} opt_xml Whether to use XML compatible tags.
501 * @return {string} A copy of {@code str} with converted newlines.
502 */
503goog.string.newLineToBr = function(str, opt_xml) {
504 return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>');
505};
506
507
508/**
509 * Escapes double quote '"' and single quote '\'' characters in addition to
510 * '&', '<', and '>' so that a string can be included in an HTML tag attribute
511 * value within double or single quotes.
512 *
513 * It should be noted that > doesn't need to be escaped for the HTML or XML to
514 * be valid, but it has been decided to escape it for consistency with other
515 * implementations.
516 *
517 * With goog.string.DETECT_DOUBLE_ESCAPING, this function escapes also the
518 * lowercase letter "e".
519 *
520 * NOTE(user):
521 * HtmlEscape is often called during the generation of large blocks of HTML.
522 * Using statics for the regular expressions and strings is an optimization
523 * that can more than half the amount of time IE spends in this function for
524 * large apps, since strings and regexes both contribute to GC allocations.
525 *
526 * Testing for the presence of a character before escaping increases the number
527 * of function calls, but actually provides a speed increase for the average
528 * case -- since the average case often doesn't require the escaping of all 4
529 * characters and indexOf() is much cheaper than replace().
530 * The worst case does suffer slightly from the additional calls, therefore the
531 * opt_isLikelyToContainHtmlChars option has been included for situations
532 * where all 4 HTML entities are very likely to be present and need escaping.
533 *
534 * Some benchmarks (times tended to fluctuate +-0.05ms):
535 * FireFox IE6
536 * (no chars / average (mix of cases) / all 4 chars)
537 * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80
538 * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84
539 * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85
540 *
541 * An additional advantage of checking if replace actually needs to be called
542 * is a reduction in the number of object allocations, so as the size of the
543 * application grows the difference between the various methods would increase.
544 *
545 * @param {string} str string to be escaped.
546 * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see
547 * if the character needs replacing - use this option if you expect each of
548 * the characters to appear often. Leave false if you expect few html
549 * characters to occur in your strings, such as if you are escaping HTML.
550 * @return {string} An escaped copy of {@code str}.
551 */
552goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) {
553
554 if (opt_isLikelyToContainHtmlChars) {
555 str = str.replace(goog.string.AMP_RE_, '&amp;')
556 .replace(goog.string.LT_RE_, '&lt;')
557 .replace(goog.string.GT_RE_, '&gt;')
558 .replace(goog.string.QUOT_RE_, '&quot;')
559 .replace(goog.string.SINGLE_QUOTE_RE_, '&#39;')
560 .replace(goog.string.NULL_RE_, '&#0;');
561 if (goog.string.DETECT_DOUBLE_ESCAPING) {
562 str = str.replace(goog.string.E_RE_, '&#101;');
563 }
564 return str;
565
566 } else {
567 // quick test helps in the case when there are no chars to replace, in
568 // worst case this makes barely a difference to the time taken
569 if (!goog.string.ALL_RE_.test(str)) return str;
570
571 // str.indexOf is faster than regex.test in this case
572 if (str.indexOf('&') != -1) {
573 str = str.replace(goog.string.AMP_RE_, '&amp;');
574 }
575 if (str.indexOf('<') != -1) {
576 str = str.replace(goog.string.LT_RE_, '&lt;');
577 }
578 if (str.indexOf('>') != -1) {
579 str = str.replace(goog.string.GT_RE_, '&gt;');
580 }
581 if (str.indexOf('"') != -1) {
582 str = str.replace(goog.string.QUOT_RE_, '&quot;');
583 }
584 if (str.indexOf('\'') != -1) {
585 str = str.replace(goog.string.SINGLE_QUOTE_RE_, '&#39;');
586 }
587 if (str.indexOf('\x00') != -1) {
588 str = str.replace(goog.string.NULL_RE_, '&#0;');
589 }
590 if (goog.string.DETECT_DOUBLE_ESCAPING && str.indexOf('e') != -1) {
591 str = str.replace(goog.string.E_RE_, '&#101;');
592 }
593 return str;
594 }
595};
596
597
598/**
599 * Regular expression that matches an ampersand, for use in escaping.
600 * @const {!RegExp}
601 * @private
602 */
603goog.string.AMP_RE_ = /&/g;
604
605
606/**
607 * Regular expression that matches a less than sign, for use in escaping.
608 * @const {!RegExp}
609 * @private
610 */
611goog.string.LT_RE_ = /</g;
612
613
614/**
615 * Regular expression that matches a greater than sign, for use in escaping.
616 * @const {!RegExp}
617 * @private
618 */
619goog.string.GT_RE_ = />/g;
620
621
622/**
623 * Regular expression that matches a double quote, for use in escaping.
624 * @const {!RegExp}
625 * @private
626 */
627goog.string.QUOT_RE_ = /"/g;
628
629
630/**
631 * Regular expression that matches a single quote, for use in escaping.
632 * @const {!RegExp}
633 * @private
634 */
635goog.string.SINGLE_QUOTE_RE_ = /'/g;
636
637
638/**
639 * Regular expression that matches null character, for use in escaping.
640 * @const {!RegExp}
641 * @private
642 */
643goog.string.NULL_RE_ = /\x00/g;
644
645
646/**
647 * Regular expression that matches a lowercase letter "e", for use in escaping.
648 * @const {!RegExp}
649 * @private
650 */
651goog.string.E_RE_ = /e/g;
652
653
654/**
655 * Regular expression that matches any character that needs to be escaped.
656 * @const {!RegExp}
657 * @private
658 */
659goog.string.ALL_RE_ = (goog.string.DETECT_DOUBLE_ESCAPING ?
660 /[\x00&<>"'e]/ :
661 /[\x00&<>"']/);
662
663
664/**
665 * Unescapes an HTML string.
666 *
667 * @param {string} str The string to unescape.
668 * @return {string} An unescaped copy of {@code str}.
669 */
670goog.string.unescapeEntities = function(str) {
671 if (goog.string.contains(str, '&')) {
672 // We are careful not to use a DOM if we do not have one or we explicitly
673 // requested non-DOM html unescaping.
674 if (!goog.string.FORCE_NON_DOM_HTML_UNESCAPING &&
675 'document' in goog.global) {
676 return goog.string.unescapeEntitiesUsingDom_(str);
677 } else {
678 // Fall back on pure XML entities
679 return goog.string.unescapePureXmlEntities_(str);
680 }
681 }
682 return str;
683};
684
685
686/**
687 * Unescapes a HTML string using the provided document.
688 *
689 * @param {string} str The string to unescape.
690 * @param {!Document} document A document to use in escaping the string.
691 * @return {string} An unescaped copy of {@code str}.
692 */
693goog.string.unescapeEntitiesWithDocument = function(str, document) {
694 if (goog.string.contains(str, '&')) {
695 return goog.string.unescapeEntitiesUsingDom_(str, document);
696 }
697 return str;
698};
699
700
701/**
702 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
703 * entities. This function is XSS-safe and whitespace-preserving.
704 * @private
705 * @param {string} str The string to unescape.
706 * @param {Document=} opt_document An optional document to use for creating
707 * elements. If this is not specified then the default window.document
708 * will be used.
709 * @return {string} The unescaped {@code str} string.
710 */
711goog.string.unescapeEntitiesUsingDom_ = function(str, opt_document) {
712 /** @type {!Object<string, string>} */
713 var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
714 var div;
715 if (opt_document) {
716 div = opt_document.createElement('div');
717 } else {
718 div = goog.global.document.createElement('div');
719 }
720 // Match as many valid entity characters as possible. If the actual entity
721 // happens to be shorter, it will still work as innerHTML will return the
722 // trailing characters unchanged. Since the entity characters do not include
723 // open angle bracket, there is no chance of XSS from the innerHTML use.
724 // Since no whitespace is passed to innerHTML, whitespace is preserved.
725 return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) {
726 // Check for cached entity.
727 var value = seen[s];
728 if (value) {
729 return value;
730 }
731 // Check for numeric entity.
732 if (entity.charAt(0) == '#') {
733 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
734 var n = Number('0' + entity.substr(1));
735 if (!isNaN(n)) {
736 value = String.fromCharCode(n);
737 }
738 }
739 // Fall back to innerHTML otherwise.
740 if (!value) {
741 // Append a non-entity character to avoid a bug in Webkit that parses
742 // an invalid entity at the end of innerHTML text as the empty string.
743 div.innerHTML = s + ' ';
744 // Then remove the trailing character from the result.
745 value = div.firstChild.nodeValue.slice(0, -1);
746 }
747 // Cache and return.
748 return seen[s] = value;
749 });
750};
751
752
753/**
754 * Unescapes XML entities.
755 * @private
756 * @param {string} str The string to unescape.
757 * @return {string} An unescaped copy of {@code str}.
758 */
759goog.string.unescapePureXmlEntities_ = function(str) {
760 return str.replace(/&([^;]+);/g, function(s, entity) {
761 switch (entity) {
762 case 'amp':
763 return '&';
764 case 'lt':
765 return '<';
766 case 'gt':
767 return '>';
768 case 'quot':
769 return '"';
770 default:
771 if (entity.charAt(0) == '#') {
772 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex.
773 var n = Number('0' + entity.substr(1));
774 if (!isNaN(n)) {
775 return String.fromCharCode(n);
776 }
777 }
778 // For invalid entities we just return the entity
779 return s;
780 }
781 });
782};
783
784
785/**
786 * Regular expression that matches an HTML entity.
787 * See also HTML5: Tokenization / Tokenizing character references.
788 * @private
789 * @type {!RegExp}
790 */
791goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g;
792
793
794/**
795 * Do escaping of whitespace to preserve spatial formatting. We use character
796 * entity #160 to make it safer for xml.
797 * @param {string} str The string in which to escape whitespace.
798 * @param {boolean=} opt_xml Whether to use XML compatible tags.
799 * @return {string} An escaped copy of {@code str}.
800 */
801goog.string.whitespaceEscape = function(str, opt_xml) {
802 // This doesn't use goog.string.preserveSpaces for backwards compatibility.
803 return goog.string.newLineToBr(str.replace(/ /g, ' &#160;'), opt_xml);
804};
805
806
807/**
808 * Preserve spaces that would be otherwise collapsed in HTML by replacing them
809 * with non-breaking space Unicode characters.
810 * @param {string} str The string in which to preserve whitespace.
811 * @return {string} A copy of {@code str} with preserved whitespace.
812 */
813goog.string.preserveSpaces = function(str) {
814 return str.replace(/(^|[\n ]) /g, '$1' + goog.string.Unicode.NBSP);
815};
816
817
818/**
819 * Strip quote characters around a string. The second argument is a string of
820 * characters to treat as quotes. This can be a single character or a string of
821 * multiple character and in that case each of those are treated as possible
822 * quote characters. For example:
823 *
824 * <pre>
825 * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
826 * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
827 * </pre>
828 *
829 * @param {string} str The string to strip.
830 * @param {string} quoteChars The quote characters to strip.
831 * @return {string} A copy of {@code str} without the quotes.
832 */
833goog.string.stripQuotes = function(str, quoteChars) {
834 var length = quoteChars.length;
835 for (var i = 0; i < length; i++) {
836 var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i);
837 if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) {
838 return str.substring(1, str.length - 1);
839 }
840 }
841 return str;
842};
843
844
845/**
846 * Truncates a string to a certain length and adds '...' if necessary. The
847 * length also accounts for the ellipsis, so a maximum length of 10 and a string
848 * 'Hello World!' produces 'Hello W...'.
849 * @param {string} str The string to truncate.
850 * @param {number} chars Max number of characters.
851 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
852 * characters from being cut off in the middle.
853 * @return {string} The truncated {@code str} string.
854 */
855goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) {
856 if (opt_protectEscapedCharacters) {
857 str = goog.string.unescapeEntities(str);
858 }
859
860 if (str.length > chars) {
861 str = str.substring(0, chars - 3) + '...';
862 }
863
864 if (opt_protectEscapedCharacters) {
865 str = goog.string.htmlEscape(str);
866 }
867
868 return str;
869};
870
871
872/**
873 * Truncate a string in the middle, adding "..." if necessary,
874 * and favoring the beginning of the string.
875 * @param {string} str The string to truncate the middle of.
876 * @param {number} chars Max number of characters.
877 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
878 * characters from being cutoff in the middle.
879 * @param {number=} opt_trailingChars Optional number of trailing characters to
880 * leave at the end of the string, instead of truncating as close to the
881 * middle as possible.
882 * @return {string} A truncated copy of {@code str}.
883 */
884goog.string.truncateMiddle = function(str, chars,
885 opt_protectEscapedCharacters, opt_trailingChars) {
886 if (opt_protectEscapedCharacters) {
887 str = goog.string.unescapeEntities(str);
888 }
889
890 if (opt_trailingChars && str.length > chars) {
891 if (opt_trailingChars > chars) {
892 opt_trailingChars = chars;
893 }
894 var endPoint = str.length - opt_trailingChars;
895 var startPoint = chars - opt_trailingChars;
896 str = str.substring(0, startPoint) + '...' + str.substring(endPoint);
897 } else if (str.length > chars) {
898 // Favor the beginning of the string:
899 var half = Math.floor(chars / 2);
900 var endPos = str.length - half;
901 half += chars % 2;
902 str = str.substring(0, half) + '...' + str.substring(endPos);
903 }
904
905 if (opt_protectEscapedCharacters) {
906 str = goog.string.htmlEscape(str);
907 }
908
909 return str;
910};
911
912
913/**
914 * Special chars that need to be escaped for goog.string.quote.
915 * @private {!Object<string, string>}
916 */
917goog.string.specialEscapeChars_ = {
918 '\0': '\\0',
919 '\b': '\\b',
920 '\f': '\\f',
921 '\n': '\\n',
922 '\r': '\\r',
923 '\t': '\\t',
924 '\x0B': '\\x0B', // '\v' is not supported in JScript
925 '"': '\\"',
926 '\\': '\\\\'
927};
928
929
930/**
931 * Character mappings used internally for goog.string.escapeChar.
932 * @private {!Object<string, string>}
933 */
934goog.string.jsEscapeCache_ = {
935 '\'': '\\\''
936};
937
938
939/**
940 * Encloses a string in double quotes and escapes characters so that the
941 * string is a valid JS string.
942 * @param {string} s The string to quote.
943 * @return {string} A copy of {@code s} surrounded by double quotes.
944 */
945goog.string.quote = function(s) {
946 s = String(s);
947 if (s.quote) {
948 return s.quote();
949 } else {
950 var sb = ['"'];
951 for (var i = 0; i < s.length; i++) {
952 var ch = s.charAt(i);
953 var cc = ch.charCodeAt(0);
954 sb[i + 1] = goog.string.specialEscapeChars_[ch] ||
955 ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch));
956 }
957 sb.push('"');
958 return sb.join('');
959 }
960};
961
962
963/**
964 * Takes a string and returns the escaped string for that character.
965 * @param {string} str The string to escape.
966 * @return {string} An escaped string representing {@code str}.
967 */
968goog.string.escapeString = function(str) {
969 var sb = [];
970 for (var i = 0; i < str.length; i++) {
971 sb[i] = goog.string.escapeChar(str.charAt(i));
972 }
973 return sb.join('');
974};
975
976
977/**
978 * Takes a character and returns the escaped string for that character. For
979 * example escapeChar(String.fromCharCode(15)) -> "\\x0E".
980 * @param {string} c The character to escape.
981 * @return {string} An escaped string representing {@code c}.
982 */
983goog.string.escapeChar = function(c) {
984 if (c in goog.string.jsEscapeCache_) {
985 return goog.string.jsEscapeCache_[c];
986 }
987
988 if (c in goog.string.specialEscapeChars_) {
989 return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c];
990 }
991
992 var rv = c;
993 var cc = c.charCodeAt(0);
994 if (cc > 31 && cc < 127) {
995 rv = c;
996 } else {
997 // tab is 9 but handled above
998 if (cc < 256) {
999 rv = '\\x';
1000 if (cc < 16 || cc > 256) {
1001 rv += '0';
1002 }
1003 } else {
1004 rv = '\\u';
1005 if (cc < 4096) { // \u1000
1006 rv += '0';
1007 }
1008 }
1009 rv += cc.toString(16).toUpperCase();
1010 }
1011
1012 return goog.string.jsEscapeCache_[c] = rv;
1013};
1014
1015
1016/**
1017 * Determines whether a string contains a substring.
1018 * @param {string} str The string to search.
1019 * @param {string} subString The substring to search for.
1020 * @return {boolean} Whether {@code str} contains {@code subString}.
1021 */
1022goog.string.contains = function(str, subString) {
1023 return str.indexOf(subString) != -1;
1024};
1025
1026
1027/**
1028 * Determines whether a string contains a substring, ignoring case.
1029 * @param {string} str The string to search.
1030 * @param {string} subString The substring to search for.
1031 * @return {boolean} Whether {@code str} contains {@code subString}.
1032 */
1033goog.string.caseInsensitiveContains = function(str, subString) {
1034 return goog.string.contains(str.toLowerCase(), subString.toLowerCase());
1035};
1036
1037
1038/**
1039 * Returns the non-overlapping occurrences of ss in s.
1040 * If either s or ss evalutes to false, then returns zero.
1041 * @param {string} s The string to look in.
1042 * @param {string} ss The string to look for.
1043 * @return {number} Number of occurrences of ss in s.
1044 */
1045goog.string.countOf = function(s, ss) {
1046 return s && ss ? s.split(ss).length - 1 : 0;
1047};
1048
1049
1050/**
1051 * Removes a substring of a specified length at a specific
1052 * index in a string.
1053 * @param {string} s The base string from which to remove.
1054 * @param {number} index The index at which to remove the substring.
1055 * @param {number} stringLength The length of the substring to remove.
1056 * @return {string} A copy of {@code s} with the substring removed or the full
1057 * string if nothing is removed or the input is invalid.
1058 */
1059goog.string.removeAt = function(s, index, stringLength) {
1060 var resultStr = s;
1061 // If the index is greater or equal to 0 then remove substring
1062 if (index >= 0 && index < s.length && stringLength > 0) {
1063 resultStr = s.substr(0, index) +
1064 s.substr(index + stringLength, s.length - index - stringLength);
1065 }
1066 return resultStr;
1067};
1068
1069
1070/**
1071 * Removes the first occurrence of a substring from a string.
1072 * @param {string} s The base string from which to remove.
1073 * @param {string} ss The string to remove.
1074 * @return {string} A copy of {@code s} with {@code ss} removed or the full
1075 * string if nothing is removed.
1076 */
1077goog.string.remove = function(s, ss) {
1078 var re = new RegExp(goog.string.regExpEscape(ss), '');
1079 return s.replace(re, '');
1080};
1081
1082
1083/**
1084 * Removes all occurrences of a substring from a string.
1085 * @param {string} s The base string from which to remove.
1086 * @param {string} ss The string to remove.
1087 * @return {string} A copy of {@code s} with {@code ss} removed or the full
1088 * string if nothing is removed.
1089 */
1090goog.string.removeAll = function(s, ss) {
1091 var re = new RegExp(goog.string.regExpEscape(ss), 'g');
1092 return s.replace(re, '');
1093};
1094
1095
1096/**
1097 * Escapes characters in the string that are not safe to use in a RegExp.
1098 * @param {*} s The string to escape. If not a string, it will be casted
1099 * to one.
1100 * @return {string} A RegExp safe, escaped copy of {@code s}.
1101 */
1102goog.string.regExpEscape = function(s) {
1103 return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
1104 replace(/\x08/g, '\\x08');
1105};
1106
1107
1108/**
1109 * Repeats a string n times.
1110 * @param {string} string The string to repeat.
1111 * @param {number} length The number of times to repeat.
1112 * @return {string} A string containing {@code length} repetitions of
1113 * {@code string}.
1114 */
1115goog.string.repeat = function(string, length) {
1116 return new Array(length + 1).join(string);
1117};
1118
1119
1120/**
1121 * Pads number to given length and optionally rounds it to a given precision.
1122 * For example:
1123 * <pre>padNumber(1.25, 2, 3) -> '01.250'
1124 * padNumber(1.25, 2) -> '01.25'
1125 * padNumber(1.25, 2, 1) -> '01.3'
1126 * padNumber(1.25, 0) -> '1.25'</pre>
1127 *
1128 * @param {number} num The number to pad.
1129 * @param {number} length The desired length.
1130 * @param {number=} opt_precision The desired precision.
1131 * @return {string} {@code num} as a string with the given options.
1132 */
1133goog.string.padNumber = function(num, length, opt_precision) {
1134 var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num);
1135 var index = s.indexOf('.');
1136 if (index == -1) {
1137 index = s.length;
1138 }
1139 return goog.string.repeat('0', Math.max(0, length - index)) + s;
1140};
1141
1142
1143/**
1144 * Returns a string representation of the given object, with
1145 * null and undefined being returned as the empty string.
1146 *
1147 * @param {*} obj The object to convert.
1148 * @return {string} A string representation of the {@code obj}.
1149 */
1150goog.string.makeSafe = function(obj) {
1151 return obj == null ? '' : String(obj);
1152};
1153
1154
1155/**
1156 * Concatenates string expressions. This is useful
1157 * since some browsers are very inefficient when it comes to using plus to
1158 * concat strings. Be careful when using null and undefined here since
1159 * these will not be included in the result. If you need to represent these
1160 * be sure to cast the argument to a String first.
1161 * For example:
1162 * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd'
1163 * buildString(null, undefined) -> ''
1164 * </pre>
1165 * @param {...*} var_args A list of strings to concatenate. If not a string,
1166 * it will be casted to one.
1167 * @return {string} The concatenation of {@code var_args}.
1168 */
1169goog.string.buildString = function(var_args) {
1170 return Array.prototype.join.call(arguments, '');
1171};
1172
1173
1174/**
1175 * Returns a string with at least 64-bits of randomness.
1176 *
1177 * Doesn't trust Javascript's random function entirely. Uses a combination of
1178 * random and current timestamp, and then encodes the string in base-36 to
1179 * make it shorter.
1180 *
1181 * @return {string} A random string, e.g. sn1s7vb4gcic.
1182 */
1183goog.string.getRandomString = function() {
1184 var x = 2147483648;
1185 return Math.floor(Math.random() * x).toString(36) +
1186 Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36);
1187};
1188
1189
1190/**
1191 * Compares two version numbers.
1192 *
1193 * @param {string|number} version1 Version of first item.
1194 * @param {string|number} version2 Version of second item.
1195 *
1196 * @return {number} 1 if {@code version1} is higher.
1197 * 0 if arguments are equal.
1198 * -1 if {@code version2} is higher.
1199 */
1200goog.string.compareVersions = function(version1, version2) {
1201 var order = 0;
1202 // Trim leading and trailing whitespace and split the versions into
1203 // subversions.
1204 var v1Subs = goog.string.trim(String(version1)).split('.');
1205 var v2Subs = goog.string.trim(String(version2)).split('.');
1206 var subCount = Math.max(v1Subs.length, v2Subs.length);
1207
1208 // Iterate over the subversions, as long as they appear to be equivalent.
1209 for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) {
1210 var v1Sub = v1Subs[subIdx] || '';
1211 var v2Sub = v2Subs[subIdx] || '';
1212
1213 // Split the subversions into pairs of numbers and qualifiers (like 'b').
1214 // Two different RegExp objects are needed because they are both using
1215 // the 'g' flag.
1216 var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1217 var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1218 do {
1219 var v1Comp = v1CompParser.exec(v1Sub) || ['', '', ''];
1220 var v2Comp = v2CompParser.exec(v2Sub) || ['', '', ''];
1221 // Break if there are no more matches.
1222 if (v1Comp[0].length == 0 && v2Comp[0].length == 0) {
1223 break;
1224 }
1225
1226 // Parse the numeric part of the subversion. A missing number is
1227 // equivalent to 0.
1228 var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10);
1229 var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10);
1230
1231 // Compare the subversion components. The number has the highest
1232 // precedence. Next, if the numbers are equal, a subversion without any
1233 // qualifier is always higher than a subversion with any qualifier. Next,
1234 // the qualifiers are compared as strings.
1235 order = goog.string.compareElements_(v1CompNum, v2CompNum) ||
1236 goog.string.compareElements_(v1Comp[2].length == 0,
1237 v2Comp[2].length == 0) ||
1238 goog.string.compareElements_(v1Comp[2], v2Comp[2]);
1239 // Stop as soon as an inequality is discovered.
1240 } while (order == 0);
1241 }
1242
1243 return order;
1244};
1245
1246
1247/**
1248 * Compares elements of a version number.
1249 *
1250 * @param {string|number|boolean} left An element from a version number.
1251 * @param {string|number|boolean} right An element from a version number.
1252 *
1253 * @return {number} 1 if {@code left} is higher.
1254 * 0 if arguments are equal.
1255 * -1 if {@code right} is higher.
1256 * @private
1257 */
1258goog.string.compareElements_ = function(left, right) {
1259 if (left < right) {
1260 return -1;
1261 } else if (left > right) {
1262 return 1;
1263 }
1264 return 0;
1265};
1266
1267
1268/**
1269 * Maximum value of #goog.string.hashCode, exclusive. 2^32.
1270 * @type {number}
1271 * @private
1272 */
1273goog.string.HASHCODE_MAX_ = 0x100000000;
1274
1275
1276/**
1277 * String hash function similar to java.lang.String.hashCode().
1278 * The hash code for a string is computed as
1279 * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
1280 * where s[i] is the ith character of the string and n is the length of
1281 * the string. We mod the result to make it between 0 (inclusive) and 2^32
1282 * (exclusive).
1283 * @param {string} str A string.
1284 * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32
1285 * (exclusive). The empty string returns 0.
1286 */
1287goog.string.hashCode = function(str) {
1288 var result = 0;
1289 for (var i = 0; i < str.length; ++i) {
1290 result = 31 * result + str.charCodeAt(i);
1291 // Normalize to 4 byte range, 0 ... 2^32.
1292 result %= goog.string.HASHCODE_MAX_;
1293 }
1294 return result;
1295};
1296
1297
1298/**
1299 * The most recent unique ID. |0 is equivalent to Math.floor in this case.
1300 * @type {number}
1301 * @private
1302 */
1303goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0;
1304
1305
1306/**
1307 * Generates and returns a string which is unique in the current document.
1308 * This is useful, for example, to create unique IDs for DOM elements.
1309 * @return {string} A unique id.
1310 */
1311goog.string.createUniqueString = function() {
1312 return 'goog_' + goog.string.uniqueStringCounter_++;
1313};
1314
1315
1316/**
1317 * Converts the supplied string to a number, which may be Infinity or NaN.
1318 * This function strips whitespace: (toNumber(' 123') === 123)
1319 * This function accepts scientific notation: (toNumber('1e1') === 10)
1320 *
1321 * This is better than Javascript's built-in conversions because, sadly:
1322 * (Number(' ') === 0) and (parseFloat('123a') === 123)
1323 *
1324 * @param {string} str The string to convert.
1325 * @return {number} The number the supplied string represents, or NaN.
1326 */
1327goog.string.toNumber = function(str) {
1328 var num = Number(str);
1329 if (num == 0 && goog.string.isEmptyOrWhitespace(str)) {
1330 return NaN;
1331 }
1332 return num;
1333};
1334
1335
1336/**
1337 * Returns whether the given string is lower camel case (e.g. "isFooBar").
1338 *
1339 * Note that this assumes the string is entirely letters.
1340 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1341 *
1342 * @param {string} str String to test.
1343 * @return {boolean} Whether the string is lower camel case.
1344 */
1345goog.string.isLowerCamelCase = function(str) {
1346 return /^[a-z]+([A-Z][a-z]*)*$/.test(str);
1347};
1348
1349
1350/**
1351 * Returns whether the given string is upper camel case (e.g. "FooBarBaz").
1352 *
1353 * Note that this assumes the string is entirely letters.
1354 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1355 *
1356 * @param {string} str String to test.
1357 * @return {boolean} Whether the string is upper camel case.
1358 */
1359goog.string.isUpperCamelCase = function(str) {
1360 return /^([A-Z][a-z]*)+$/.test(str);
1361};
1362
1363
1364/**
1365 * Converts a string from selector-case to camelCase (e.g. from
1366 * "multi-part-string" to "multiPartString"), useful for converting
1367 * CSS selectors and HTML dataset keys to their equivalent JS properties.
1368 * @param {string} str The string in selector-case form.
1369 * @return {string} The string in camelCase form.
1370 */
1371goog.string.toCamelCase = function(str) {
1372 return String(str).replace(/\-([a-z])/g, function(all, match) {
1373 return match.toUpperCase();
1374 });
1375};
1376
1377
1378/**
1379 * Converts a string from camelCase to selector-case (e.g. from
1380 * "multiPartString" to "multi-part-string"), useful for converting JS
1381 * style and dataset properties to equivalent CSS selectors and HTML keys.
1382 * @param {string} str The string in camelCase form.
1383 * @return {string} The string in selector-case form.
1384 */
1385goog.string.toSelectorCase = function(str) {
1386 return String(str).replace(/([A-Z])/g, '-$1').toLowerCase();
1387};
1388
1389
1390/**
1391 * Converts a string into TitleCase. First character of the string is always
1392 * capitalized in addition to the first letter of every subsequent word.
1393 * Words are delimited by one or more whitespaces by default. Custom delimiters
1394 * can optionally be specified to replace the default, which doesn't preserve
1395 * whitespace delimiters and instead must be explicitly included if needed.
1396 *
1397 * Default delimiter => " ":
1398 * goog.string.toTitleCase('oneTwoThree') => 'OneTwoThree'
1399 * goog.string.toTitleCase('one two three') => 'One Two Three'
1400 * goog.string.toTitleCase(' one two ') => ' One Two '
1401 * goog.string.toTitleCase('one_two_three') => 'One_two_three'
1402 * goog.string.toTitleCase('one-two-three') => 'One-two-three'
1403 *
1404 * Custom delimiter => "_-.":
1405 * goog.string.toTitleCase('oneTwoThree', '_-.') => 'OneTwoThree'
1406 * goog.string.toTitleCase('one two three', '_-.') => 'One two three'
1407 * goog.string.toTitleCase(' one two ', '_-.') => ' one two '
1408 * goog.string.toTitleCase('one_two_three', '_-.') => 'One_Two_Three'
1409 * goog.string.toTitleCase('one-two-three', '_-.') => 'One-Two-Three'
1410 * goog.string.toTitleCase('one...two...three', '_-.') => 'One...Two...Three'
1411 * goog.string.toTitleCase('one. two. three', '_-.') => 'One. two. three'
1412 * goog.string.toTitleCase('one-two.three', '_-.') => 'One-Two.Three'
1413 *
1414 * @param {string} str String value in camelCase form.
1415 * @param {string=} opt_delimiters Custom delimiter character set used to
1416 * distinguish words in the string value. Each character represents a
1417 * single delimiter. When provided, default whitespace delimiter is
1418 * overridden and must be explicitly included if needed.
1419 * @return {string} String value in TitleCase form.
1420 */
1421goog.string.toTitleCase = function(str, opt_delimiters) {
1422 var delimiters = goog.isString(opt_delimiters) ?
1423 goog.string.regExpEscape(opt_delimiters) : '\\s';
1424
1425 // For IE8, we need to prevent using an empty character set. Otherwise,
1426 // incorrect matching will occur.
1427 delimiters = delimiters ? '|[' + delimiters + ']+' : '';
1428
1429 var regexp = new RegExp('(^' + delimiters + ')([a-z])', 'g');
1430 return str.replace(regexp, function(all, p1, p2) {
1431 return p1 + p2.toUpperCase();
1432 });
1433};
1434
1435
1436/**
1437 * Capitalizes a string, i.e. converts the first letter to uppercase
1438 * and all other letters to lowercase, e.g.:
1439 *
1440 * goog.string.capitalize('one') => 'One'
1441 * goog.string.capitalize('ONE') => 'One'
1442 * goog.string.capitalize('one two') => 'One two'
1443 *
1444 * Note that this function does not trim initial whitespace.
1445 *
1446 * @param {string} str String value to capitalize.
1447 * @return {string} String value with first letter in uppercase.
1448 */
1449goog.string.capitalize = function(str) {
1450 return String(str.charAt(0)).toUpperCase() +
1451 String(str.substr(1)).toLowerCase();
1452};
1453
1454
1455/**
1456 * Parse a string in decimal or hexidecimal ('0xFFFF') form.
1457 *
1458 * To parse a particular radix, please use parseInt(string, radix) directly. See
1459 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/parseInt
1460 *
1461 * This is a wrapper for the built-in parseInt function that will only parse
1462 * numbers as base 10 or base 16. Some JS implementations assume strings
1463 * starting with "0" are intended to be octal. ES3 allowed but discouraged
1464 * this behavior. ES5 forbids it. This function emulates the ES5 behavior.
1465 *
1466 * For more information, see Mozilla JS Reference: http://goo.gl/8RiFj
1467 *
1468 * @param {string|number|null|undefined} value The value to be parsed.
1469 * @return {number} The number, parsed. If the string failed to parse, this
1470 * will be NaN.
1471 */
1472goog.string.parseInt = function(value) {
1473 // Force finite numbers to strings.
1474 if (isFinite(value)) {
1475 value = String(value);
1476 }
1477
1478 if (goog.isString(value)) {
1479 // If the string starts with '0x' or '-0x', parse as hex.
1480 return /^\s*-?0x/i.test(value) ?
1481 parseInt(value, 16) : parseInt(value, 10);
1482 }
1483
1484 return NaN;
1485};
1486
1487
1488/**
1489 * Splits a string on a separator a limited number of times.
1490 *
1491 * This implementation is more similar to Python or Java, where the limit
1492 * parameter specifies the maximum number of splits rather than truncating
1493 * the number of results.
1494 *
1495 * See http://docs.python.org/2/library/stdtypes.html#str.split
1496 * See JavaDoc: http://goo.gl/F2AsY
1497 * See Mozilla reference: http://goo.gl/dZdZs
1498 *
1499 * @param {string} str String to split.
1500 * @param {string} separator The separator.
1501 * @param {number} limit The limit to the number of splits. The resulting array
1502 * will have a maximum length of limit+1. Negative numbers are the same
1503 * as zero.
1504 * @return {!Array<string>} The string, split.
1505 */
1506
1507goog.string.splitLimit = function(str, separator, limit) {
1508 var parts = str.split(separator);
1509 var returnVal = [];
1510
1511 // Only continue doing this while we haven't hit the limit and we have
1512 // parts left.
1513 while (limit > 0 && parts.length) {
1514 returnVal.push(parts.shift());
1515 limit--;
1516 }
1517
1518 // If there are remaining parts, append them to the end.
1519 if (parts.length) {
1520 returnVal.push(parts.join(separator));
1521 }
1522
1523 return returnVal;
1524};
1525
1526
1527/**
1528 * Computes the Levenshtein edit distance between two strings.
1529 * @param {string} a
1530 * @param {string} b
1531 * @return {number} The edit distance between the two strings.
1532 */
1533goog.string.editDistance = function(a, b) {
1534 var v0 = [];
1535 var v1 = [];
1536
1537 if (a == b) {
1538 return 0;
1539 }
1540
1541 if (!a.length || !b.length) {
1542 return Math.max(a.length, b.length);
1543 }
1544
1545 for (var i = 0; i < b.length + 1; i++) {
1546 v0[i] = i;
1547 }
1548
1549 for (var i = 0; i < a.length; i++) {
1550 v1[0] = i + 1;
1551
1552 for (var j = 0; j < b.length; j++) {
1553 var cost = a[i] != b[j];
1554 // Cost for the substring is the minimum of adding one character, removing
1555 // one character, or a swap.
1556 v1[j + 1] = Math.min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost);
1557 }
1558
1559 for (var j = 0; j < v0.length; j++) {
1560 v0[j] = v1[j];
1561 }
1562 }
1563
1564 return v1[b.length];
1565};