lib/goog/string/string.js

1// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Utilities for string manipulation.
17 */
18
19
20/**
21 * Namespace for string utilities
22 */
23goog.provide('goog.string');
24goog.provide('goog.string.Unicode');
25
26
27/**
28 * Common Unicode string characters.
29 * @enum {string}
30 */
31goog.string.Unicode = {
32 NBSP: '\xa0'
33};
34
35
36/**
37 * Fast prefix-checker.
38 * @param {string} str The string to check.
39 * @param {string} prefix A string to look for at the start of {@code str}.
40 * @return {boolean} True if {@code str} begins with {@code prefix}.
41 */
42goog.string.startsWith = function(str, prefix) {
43 return str.lastIndexOf(prefix, 0) == 0;
44};
45
46
47/**
48 * Fast suffix-checker.
49 * @param {string} str The string to check.
50 * @param {string} suffix A string to look for at the end of {@code str}.
51 * @return {boolean} True if {@code str} ends with {@code suffix}.
52 */
53goog.string.endsWith = function(str, suffix) {
54 var l = str.length - suffix.length;
55 return l >= 0 && str.indexOf(suffix, l) == l;
56};
57
58
59/**
60 * Case-insensitive prefix-checker.
61 * @param {string} str The string to check.
62 * @param {string} prefix A string to look for at the end of {@code str}.
63 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
64 * case).
65 */
66goog.string.caseInsensitiveStartsWith = function(str, prefix) {
67 return goog.string.caseInsensitiveCompare(
68 prefix, str.substr(0, prefix.length)) == 0;
69};
70
71
72/**
73 * Case-insensitive suffix-checker.
74 * @param {string} str The string to check.
75 * @param {string} suffix A string to look for at the end of {@code str}.
76 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
77 * case).
78 */
79goog.string.caseInsensitiveEndsWith = function(str, suffix) {
80 return goog.string.caseInsensitiveCompare(
81 suffix, str.substr(str.length - suffix.length, suffix.length)) == 0;
82};
83
84
85/**
86 * Case-insensitive equality checker.
87 * @param {string} str1 First string to check.
88 * @param {string} str2 Second string to check.
89 * @return {boolean} True if {@code str1} and {@code str2} are the same string,
90 * ignoring case.
91 */
92goog.string.caseInsensitiveEquals = function(str1, str2) {
93 return str1.toLowerCase() == str2.toLowerCase();
94};
95
96
97/**
98 * Does simple python-style string substitution.
99 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
100 * @param {string} str The string containing the pattern.
101 * @param {...*} var_args The items to substitute into the pattern.
102 * @return {string} A copy of {@code str} in which each occurrence of
103 * {@code %s} has been replaced an argument from {@code var_args}.
104 */
105goog.string.subs = function(str, var_args) {
106 var splitParts = str.split('%s');
107 var returnString = '';
108
109 var subsArguments = Array.prototype.slice.call(arguments, 1);
110 while (subsArguments.length &&
111 // Replace up to the last split part. We are inserting in the
112 // positions between split parts.
113 splitParts.length > 1) {
114 returnString += splitParts.shift() + subsArguments.shift();
115 }
116
117 return returnString + splitParts.join('%s'); // Join unused '%s'
118};
119
120
121/**
122 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
123 * and tabs) to a single space, and strips leading and trailing whitespace.
124 * @param {string} str Input string.
125 * @return {string} A copy of {@code str} with collapsed whitespace.
126 */
127goog.string.collapseWhitespace = function(str) {
128 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
129 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
130 // include it in the regexp to enforce consistent cross-browser behavior.
131 return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
132};
133
134
135/**
136 * Checks if a string is empty or contains only whitespaces.
137 * @param {string} str The string to check.
138 * @return {boolean} True if {@code str} is empty or whitespace only.
139 */
140goog.string.isEmpty = function(str) {
141 // testing length == 0 first is actually slower in all browsers (about the
142 // same in Opera).
143 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
144 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
145 // include it in the regexp to enforce consistent cross-browser behavior.
146 return /^[\s\xa0]*$/.test(str);
147};
148
149
150/**
151 * Checks if a string is null, undefined, empty or contains only whitespaces.
152 * @param {*} str The string to check.
153 * @return {boolean} True if{@code str} is null, undefined, empty, or
154 * whitespace only.
155 */
156goog.string.isEmptySafe = function(str) {
157 return goog.string.isEmpty(goog.string.makeSafe(str));
158};
159
160
161/**
162 * Checks if a string is all breaking whitespace.
163 * @param {string} str The string to check.
164 * @return {boolean} Whether the string is all breaking whitespace.
165 */
166goog.string.isBreakingWhitespace = function(str) {
167 return !/[^\t\n\r ]/.test(str);
168};
169
170
171/**
172 * Checks if a string contains all letters.
173 * @param {string} str string to check.
174 * @return {boolean} True if {@code str} consists entirely of letters.
175 */
176goog.string.isAlpha = function(str) {
177 return !/[^a-zA-Z]/.test(str);
178};
179
180
181/**
182 * Checks if a string contains only numbers.
183 * @param {*} str string to check. If not a string, it will be
184 * casted to one.
185 * @return {boolean} True if {@code str} is numeric.
186 */
187goog.string.isNumeric = function(str) {
188 return !/[^0-9]/.test(str);
189};
190
191
192/**
193 * Checks if a string contains only numbers or letters.
194 * @param {string} str string to check.
195 * @return {boolean} True if {@code str} is alphanumeric.
196 */
197goog.string.isAlphaNumeric = function(str) {
198 return !/[^a-zA-Z0-9]/.test(str);
199};
200
201
202/**
203 * Checks if a character is a space character.
204 * @param {string} ch Character to check.
205 * @return {boolean} True if {code ch} is a space.
206 */
207goog.string.isSpace = function(ch) {
208 return ch == ' ';
209};
210
211
212/**
213 * Checks if a character is a valid unicode character.
214 * @param {string} ch Character to check.
215 * @return {boolean} True if {code ch} is a valid unicode character.
216 */
217goog.string.isUnicodeChar = function(ch) {
218 return ch.length == 1 && ch >= ' ' && ch <= '~' ||
219 ch >= '\u0080' && ch <= '\uFFFD';
220};
221
222
223/**
224 * Takes a string and replaces newlines with a space. Multiple lines are
225 * replaced with a single space.
226 * @param {string} str The string from which to strip newlines.
227 * @return {string} A copy of {@code str} stripped of newlines.
228 */
229goog.string.stripNewlines = function(str) {
230 return str.replace(/(\r\n|\r|\n)+/g, ' ');
231};
232
233
234/**
235 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
236 * @param {string} str The string to in which to canonicalize newlines.
237 * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
238 */
239goog.string.canonicalizeNewlines = function(str) {
240 return str.replace(/(\r\n|\r|\n)/g, '\n');
241};
242
243
244/**
245 * Normalizes whitespace in a string, replacing all whitespace chars with
246 * a space.
247 * @param {string} str The string in which to normalize whitespace.
248 * @return {string} A copy of {@code str} with all whitespace normalized.
249 */
250goog.string.normalizeWhitespace = function(str) {
251 return str.replace(/\xa0|\s/g, ' ');
252};
253
254
255/**
256 * Normalizes spaces in a string, replacing all consecutive spaces and tabs
257 * with a single space. Replaces non-breaking space with a space.
258 * @param {string} str The string in which to normalize spaces.
259 * @return {string} A copy of {@code str} with all consecutive spaces and tabs
260 * replaced with a single space.
261 */
262goog.string.normalizeSpaces = function(str) {
263 return str.replace(/\xa0|[ \t]+/g, ' ');
264};
265
266
267/**
268 * Removes the breaking spaces from the left and right of the string and
269 * collapses the sequences of breaking spaces in the middle into single spaces.
270 * The original and the result strings render the same way in HTML.
271 * @param {string} str A string in which to collapse spaces.
272 * @return {string} Copy of the string with normalized breaking spaces.
273 */
274goog.string.collapseBreakingSpaces = function(str) {
275 return str.replace(/[\t\r\n ]+/g, ' ').replace(
276 /^[\t\r\n ]+|[\t\r\n ]+$/g, '');
277};
278
279
280/**
281 * Trims white spaces to the left and right of a string.
282 * @param {string} str The string to trim.
283 * @return {string} A trimmed copy of {@code str}.
284 */
285goog.string.trim = function(str) {
286 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
287 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
288 // include it in the regexp to enforce consistent cross-browser behavior.
289 return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
290};
291
292
293/**
294 * Trims whitespaces at the left end of a string.
295 * @param {string} str The string to left trim.
296 * @return {string} A trimmed copy of {@code str}.
297 */
298goog.string.trimLeft = function(str) {
299 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
300 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
301 // include it in the regexp to enforce consistent cross-browser behavior.
302 return str.replace(/^[\s\xa0]+/, '');
303};
304
305
306/**
307 * Trims whitespaces at the right end of a string.
308 * @param {string} str The string to right trim.
309 * @return {string} A trimmed copy of {@code str}.
310 */
311goog.string.trimRight = function(str) {
312 // Since IE doesn't include non-breaking-space (0xa0) in their \s character
313 // class (as required by section 7.2 of the ECMAScript spec), we explicitly
314 // include it in the regexp to enforce consistent cross-browser behavior.
315 return str.replace(/[\s\xa0]+$/, '');
316};
317
318
319/**
320 * A string comparator that ignores case.
321 * -1 = str1 less than str2
322 * 0 = str1 equals str2
323 * 1 = str1 greater than str2
324 *
325 * @param {string} str1 The string to compare.
326 * @param {string} str2 The string to compare {@code str1} to.
327 * @return {number} The comparator result, as described above.
328 */
329goog.string.caseInsensitiveCompare = function(str1, str2) {
330 var test1 = String(str1).toLowerCase();
331 var test2 = String(str2).toLowerCase();
332
333 if (test1 < test2) {
334 return -1;
335 } else if (test1 == test2) {
336 return 0;
337 } else {
338 return 1;
339 }
340};
341
342
343/**
344 * Regular expression used for splitting a string into substrings of fractional
345 * numbers, integers, and non-numeric characters.
346 * @type {RegExp}
347 * @private
348 */
349goog.string.numerateCompareRegExp_ = /(\.\d+)|(\d+)|(\D+)/g;
350
351
352/**
353 * String comparison function that handles numbers in a way humans might expect.
354 * Using this function, the string "File 2.jpg" sorts before "File 10.jpg". The
355 * comparison is mostly case-insensitive, though strings that are identical
356 * except for case are sorted with the upper-case strings before lower-case.
357 *
358 * This comparison function is significantly slower (about 500x) than either
359 * the default or the case-insensitive compare. It should not be used in
360 * time-critical code, but should be fast enough to sort several hundred short
361 * strings (like filenames) with a reasonable delay.
362 *
363 * @param {string} str1 The string to compare in a numerically sensitive way.
364 * @param {string} str2 The string to compare {@code str1} to.
365 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
366 * 0 if str1 > str2.
367 */
368goog.string.numerateCompare = function(str1, str2) {
369 if (str1 == str2) {
370 return 0;
371 }
372 if (!str1) {
373 return -1;
374 }
375 if (!str2) {
376 return 1;
377 }
378
379 // Using match to split the entire string ahead of time turns out to be faster
380 // for most inputs than using RegExp.exec or iterating over each character.
381 var tokens1 = str1.toLowerCase().match(goog.string.numerateCompareRegExp_);
382 var tokens2 = str2.toLowerCase().match(goog.string.numerateCompareRegExp_);
383
384 var count = Math.min(tokens1.length, tokens2.length);
385
386 for (var i = 0; i < count; i++) {
387 var a = tokens1[i];
388 var b = tokens2[i];
389
390 // Compare pairs of tokens, returning if one token sorts before the other.
391 if (a != b) {
392
393 // Only if both tokens are integers is a special comparison required.
394 // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
395 var num1 = parseInt(a, 10);
396 if (!isNaN(num1)) {
397 var num2 = parseInt(b, 10);
398 if (!isNaN(num2) && num1 - num2) {
399 return num1 - num2;
400 }
401 }
402 return a < b ? -1 : 1;
403 }
404 }
405
406 // If one string is a substring of the other, the shorter string sorts first.
407 if (tokens1.length != tokens2.length) {
408 return tokens1.length - tokens2.length;
409 }
410
411 // The two strings must be equivalent except for case (perfect equality is
412 // tested at the head of the function.) Revert to default ASCII-betical string
413 // comparison to stablize the sort.
414 return str1 < str2 ? -1 : 1;
415};
416
417
418/**
419 * URL-encodes a string
420 * @param {*} str The string to url-encode.
421 * @return {string} An encoded copy of {@code str} that is safe for urls.
422 * Note that '#', ':', and other characters used to delimit portions
423 * of URLs *will* be encoded.
424 */
425goog.string.urlEncode = function(str) {
426 return encodeURIComponent(String(str));
427};
428
429
430/**
431 * URL-decodes the string. We need to specially handle '+'s because
432 * the javascript library doesn't convert them to spaces.
433 * @param {string} str The string to url decode.
434 * @return {string} The decoded {@code str}.
435 */
436goog.string.urlDecode = function(str) {
437 return decodeURIComponent(str.replace(/\+/g, ' '));
438};
439
440
441/**
442 * Converts \n to <br>s or <br />s.
443 * @param {string} str The string in which to convert newlines.
444 * @param {boolean=} opt_xml Whether to use XML compatible tags.
445 * @return {string} A copy of {@code str} with converted newlines.
446 */
447goog.string.newLineToBr = function(str, opt_xml) {
448 return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>');
449};
450
451
452/**
453 * Escape double quote '"' characters in addition to '&', '<', and '>' so that a
454 * string can be included in an HTML tag attribute value within double quotes.
455 *
456 * It should be noted that > doesn't need to be escaped for the HTML or XML to
457 * be valid, but it has been decided to escape it for consistency with other
458 * implementations.
459 *
460 * NOTE(user):
461 * HtmlEscape is often called during the generation of large blocks of HTML.
462 * Using statics for the regular expressions and strings is an optimization
463 * that can more than half the amount of time IE spends in this function for
464 * large apps, since strings and regexes both contribute to GC allocations.
465 *
466 * Testing for the presence of a character before escaping increases the number
467 * of function calls, but actually provides a speed increase for the average
468 * case -- since the average case often doesn't require the escaping of all 4
469 * characters and indexOf() is much cheaper than replace().
470 * The worst case does suffer slightly from the additional calls, therefore the
471 * opt_isLikelyToContainHtmlChars option has been included for situations
472 * where all 4 HTML entities are very likely to be present and need escaping.
473 *
474 * Some benchmarks (times tended to fluctuate +-0.05ms):
475 * FireFox IE6
476 * (no chars / average (mix of cases) / all 4 chars)
477 * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80
478 * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84
479 * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85
480 *
481 * An additional advantage of checking if replace actually needs to be called
482 * is a reduction in the number of object allocations, so as the size of the
483 * application grows the difference between the various methods would increase.
484 *
485 * @param {string} str string to be escaped.
486 * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see
487 * if the character needs replacing - use this option if you expect each of
488 * the characters to appear often. Leave false if you expect few html
489 * characters to occur in your strings, such as if you are escaping HTML.
490 * @return {string} An escaped copy of {@code str}.
491 */
492goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) {
493
494 if (opt_isLikelyToContainHtmlChars) {
495 return str.replace(goog.string.amperRe_, '&amp;')
496 .replace(goog.string.ltRe_, '&lt;')
497 .replace(goog.string.gtRe_, '&gt;')
498 .replace(goog.string.quotRe_, '&quot;');
499
500 } else {
501 // quick test helps in the case when there are no chars to replace, in
502 // worst case this makes barely a difference to the time taken
503 if (!goog.string.allRe_.test(str)) return str;
504
505 // str.indexOf is faster than regex.test in this case
506 if (str.indexOf('&') != -1) {
507 str = str.replace(goog.string.amperRe_, '&amp;');
508 }
509 if (str.indexOf('<') != -1) {
510 str = str.replace(goog.string.ltRe_, '&lt;');
511 }
512 if (str.indexOf('>') != -1) {
513 str = str.replace(goog.string.gtRe_, '&gt;');
514 }
515 if (str.indexOf('"') != -1) {
516 str = str.replace(goog.string.quotRe_, '&quot;');
517 }
518 return str;
519 }
520};
521
522
523/**
524 * Regular expression that matches an ampersand, for use in escaping.
525 * @type {RegExp}
526 * @private
527 */
528goog.string.amperRe_ = /&/g;
529
530
531/**
532 * Regular expression that matches a less than sign, for use in escaping.
533 * @type {RegExp}
534 * @private
535 */
536goog.string.ltRe_ = /</g;
537
538
539/**
540 * Regular expression that matches a greater than sign, for use in escaping.
541 * @type {RegExp}
542 * @private
543 */
544goog.string.gtRe_ = />/g;
545
546
547/**
548 * Regular expression that matches a double quote, for use in escaping.
549 * @type {RegExp}
550 * @private
551 */
552goog.string.quotRe_ = /\"/g;
553
554
555/**
556 * Regular expression that matches any character that needs to be escaped.
557 * @type {RegExp}
558 * @private
559 */
560goog.string.allRe_ = /[&<>\"]/;
561
562
563/**
564 * Unescapes an HTML string.
565 *
566 * @param {string} str The string to unescape.
567 * @return {string} An unescaped copy of {@code str}.
568 */
569goog.string.unescapeEntities = function(str) {
570 if (goog.string.contains(str, '&')) {
571 // We are careful not to use a DOM if we do not have one. We use the []
572 // notation so that the JSCompiler will not complain about these objects and
573 // fields in the case where we have no DOM.
574 if ('document' in goog.global) {
575 return goog.string.unescapeEntitiesUsingDom_(str);
576 } else {
577 // Fall back on pure XML entities
578 return goog.string.unescapePureXmlEntities_(str);
579 }
580 }
581 return str;
582};
583
584
585/**
586 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
587 * entities. This function is XSS-safe and whitespace-preserving.
588 * @private
589 * @param {string} str The string to unescape.
590 * @return {string} The unescaped {@code str} string.
591 */
592goog.string.unescapeEntitiesUsingDom_ = function(str) {
593 var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
594 var div = document.createElement('div');
595 // Match as many valid entity characters as possible. If the actual entity
596 // happens to be shorter, it will still work as innerHTML will return the
597 // trailing characters unchanged. Since the entity characters do not include
598 // open angle bracket, there is no chance of XSS from the innerHTML use.
599 // Since no whitespace is passed to innerHTML, whitespace is preserved.
600 return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) {
601 // Check for cached entity.
602 var value = seen[s];
603 if (value) {
604 return value;
605 }
606 // Check for numeric entity.
607 if (entity.charAt(0) == '#') {
608 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
609 var n = Number('0' + entity.substr(1));
610 if (!isNaN(n)) {
611 value = String.fromCharCode(n);
612 }
613 }
614 // Fall back to innerHTML otherwise.
615 if (!value) {
616 // Append a non-entity character to avoid a bug in Webkit that parses
617 // an invalid entity at the end of innerHTML text as the empty string.
618 div.innerHTML = s + ' ';
619 // Then remove the trailing character from the result.
620 value = div.firstChild.nodeValue.slice(0, -1);
621 }
622 // Cache and return.
623 return seen[s] = value;
624 });
625};
626
627
628/**
629 * Unescapes XML entities.
630 * @private
631 * @param {string} str The string to unescape.
632 * @return {string} An unescaped copy of {@code str}.
633 */
634goog.string.unescapePureXmlEntities_ = function(str) {
635 return str.replace(/&([^;]+);/g, function(s, entity) {
636 switch (entity) {
637 case 'amp':
638 return '&';
639 case 'lt':
640 return '<';
641 case 'gt':
642 return '>';
643 case 'quot':
644 return '"';
645 default:
646 if (entity.charAt(0) == '#') {
647 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex.
648 var n = Number('0' + entity.substr(1));
649 if (!isNaN(n)) {
650 return String.fromCharCode(n);
651 }
652 }
653 // For invalid entities we just return the entity
654 return s;
655 }
656 });
657};
658
659
660/**
661 * Regular expression that matches an HTML entity.
662 * See also HTML5: Tokenization / Tokenizing character references.
663 * @private
664 * @type {!RegExp}
665 */
666goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g;
667
668
669/**
670 * Do escaping of whitespace to preserve spatial formatting. We use character
671 * entity #160 to make it safer for xml.
672 * @param {string} str The string in which to escape whitespace.
673 * @param {boolean=} opt_xml Whether to use XML compatible tags.
674 * @return {string} An escaped copy of {@code str}.
675 */
676goog.string.whitespaceEscape = function(str, opt_xml) {
677 return goog.string.newLineToBr(str.replace(/ /g, ' &#160;'), opt_xml);
678};
679
680
681/**
682 * Strip quote characters around a string. The second argument is a string of
683 * characters to treat as quotes. This can be a single character or a string of
684 * multiple character and in that case each of those are treated as possible
685 * quote characters. For example:
686 *
687 * <pre>
688 * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
689 * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
690 * </pre>
691 *
692 * @param {string} str The string to strip.
693 * @param {string} quoteChars The quote characters to strip.
694 * @return {string} A copy of {@code str} without the quotes.
695 */
696goog.string.stripQuotes = function(str, quoteChars) {
697 var length = quoteChars.length;
698 for (var i = 0; i < length; i++) {
699 var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i);
700 if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) {
701 return str.substring(1, str.length - 1);
702 }
703 }
704 return str;
705};
706
707
708/**
709 * Truncates a string to a certain length and adds '...' if necessary. The
710 * length also accounts for the ellipsis, so a maximum length of 10 and a string
711 * 'Hello World!' produces 'Hello W...'.
712 * @param {string} str The string to truncate.
713 * @param {number} chars Max number of characters.
714 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
715 * characters from being cut off in the middle.
716 * @return {string} The truncated {@code str} string.
717 */
718goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) {
719 if (opt_protectEscapedCharacters) {
720 str = goog.string.unescapeEntities(str);
721 }
722
723 if (str.length > chars) {
724 str = str.substring(0, chars - 3) + '...';
725 }
726
727 if (opt_protectEscapedCharacters) {
728 str = goog.string.htmlEscape(str);
729 }
730
731 return str;
732};
733
734
735/**
736 * Truncate a string in the middle, adding "..." if necessary,
737 * and favoring the beginning of the string.
738 * @param {string} str The string to truncate the middle of.
739 * @param {number} chars Max number of characters.
740 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
741 * characters from being cutoff in the middle.
742 * @param {number=} opt_trailingChars Optional number of trailing characters to
743 * leave at the end of the string, instead of truncating as close to the
744 * middle as possible.
745 * @return {string} A truncated copy of {@code str}.
746 */
747goog.string.truncateMiddle = function(str, chars,
748 opt_protectEscapedCharacters, opt_trailingChars) {
749 if (opt_protectEscapedCharacters) {
750 str = goog.string.unescapeEntities(str);
751 }
752
753 if (opt_trailingChars && str.length > chars) {
754 if (opt_trailingChars > chars) {
755 opt_trailingChars = chars;
756 }
757 var endPoint = str.length - opt_trailingChars;
758 var startPoint = chars - opt_trailingChars;
759 str = str.substring(0, startPoint) + '...' + str.substring(endPoint);
760 } else if (str.length > chars) {
761 // Favor the beginning of the string:
762 var half = Math.floor(chars / 2);
763 var endPos = str.length - half;
764 half += chars % 2;
765 str = str.substring(0, half) + '...' + str.substring(endPos);
766 }
767
768 if (opt_protectEscapedCharacters) {
769 str = goog.string.htmlEscape(str);
770 }
771
772 return str;
773};
774
775
776/**
777 * Special chars that need to be escaped for goog.string.quote.
778 * @private
779 * @type {Object}
780 */
781goog.string.specialEscapeChars_ = {
782 '\0': '\\0',
783 '\b': '\\b',
784 '\f': '\\f',
785 '\n': '\\n',
786 '\r': '\\r',
787 '\t': '\\t',
788 '\x0B': '\\x0B', // '\v' is not supported in JScript
789 '"': '\\"',
790 '\\': '\\\\'
791};
792
793
794/**
795 * Character mappings used internally for goog.string.escapeChar.
796 * @private
797 * @type {Object}
798 */
799goog.string.jsEscapeCache_ = {
800 '\'': '\\\''
801};
802
803
804/**
805 * Encloses a string in double quotes and escapes characters so that the
806 * string is a valid JS string.
807 * @param {string} s The string to quote.
808 * @return {string} A copy of {@code s} surrounded by double quotes.
809 */
810goog.string.quote = function(s) {
811 s = String(s);
812 if (s.quote) {
813 return s.quote();
814 } else {
815 var sb = ['"'];
816 for (var i = 0; i < s.length; i++) {
817 var ch = s.charAt(i);
818 var cc = ch.charCodeAt(0);
819 sb[i + 1] = goog.string.specialEscapeChars_[ch] ||
820 ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch));
821 }
822 sb.push('"');
823 return sb.join('');
824 }
825};
826
827
828/**
829 * Takes a string and returns the escaped string for that character.
830 * @param {string} str The string to escape.
831 * @return {string} An escaped string representing {@code str}.
832 */
833goog.string.escapeString = function(str) {
834 var sb = [];
835 for (var i = 0; i < str.length; i++) {
836 sb[i] = goog.string.escapeChar(str.charAt(i));
837 }
838 return sb.join('');
839};
840
841
842/**
843 * Takes a character and returns the escaped string for that character. For
844 * example escapeChar(String.fromCharCode(15)) -> "\\x0E".
845 * @param {string} c The character to escape.
846 * @return {string} An escaped string representing {@code c}.
847 */
848goog.string.escapeChar = function(c) {
849 if (c in goog.string.jsEscapeCache_) {
850 return goog.string.jsEscapeCache_[c];
851 }
852
853 if (c in goog.string.specialEscapeChars_) {
854 return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c];
855 }
856
857 var rv = c;
858 var cc = c.charCodeAt(0);
859 if (cc > 31 && cc < 127) {
860 rv = c;
861 } else {
862 // tab is 9 but handled above
863 if (cc < 256) {
864 rv = '\\x';
865 if (cc < 16 || cc > 256) {
866 rv += '0';
867 }
868 } else {
869 rv = '\\u';
870 if (cc < 4096) { // \u1000
871 rv += '0';
872 }
873 }
874 rv += cc.toString(16).toUpperCase();
875 }
876
877 return goog.string.jsEscapeCache_[c] = rv;
878};
879
880
881/**
882 * Takes a string and creates a map (Object) in which the keys are the
883 * characters in the string. The value for the key is set to true. You can
884 * then use goog.object.map or goog.array.map to change the values.
885 * @param {string} s The string to build the map from.
886 * @return {Object} The map of characters used.
887 */
888// TODO(arv): It seems like we should have a generic goog.array.toMap. But do
889// we want a dependency on goog.array in goog.string?
890goog.string.toMap = function(s) {
891 var rv = {};
892 for (var i = 0; i < s.length; i++) {
893 rv[s.charAt(i)] = true;
894 }
895 return rv;
896};
897
898
899/**
900 * Checks whether a string contains a given substring.
901 * @param {string} s The string to test.
902 * @param {string} ss The substring to test for.
903 * @return {boolean} True if {@code s} contains {@code ss}.
904 */
905goog.string.contains = function(s, ss) {
906 return s.indexOf(ss) != -1;
907};
908
909
910/**
911 * Returns the non-overlapping occurrences of ss in s.
912 * If either s or ss evalutes to false, then returns zero.
913 * @param {string} s The string to look in.
914 * @param {string} ss The string to look for.
915 * @return {number} Number of occurrences of ss in s.
916 */
917goog.string.countOf = function(s, ss) {
918 return s && ss ? s.split(ss).length - 1 : 0;
919};
920
921
922/**
923 * Removes a substring of a specified length at a specific
924 * index in a string.
925 * @param {string} s The base string from which to remove.
926 * @param {number} index The index at which to remove the substring.
927 * @param {number} stringLength The length of the substring to remove.
928 * @return {string} A copy of {@code s} with the substring removed or the full
929 * string if nothing is removed or the input is invalid.
930 */
931goog.string.removeAt = function(s, index, stringLength) {
932 var resultStr = s;
933 // If the index is greater or equal to 0 then remove substring
934 if (index >= 0 && index < s.length && stringLength > 0) {
935 resultStr = s.substr(0, index) +
936 s.substr(index + stringLength, s.length - index - stringLength);
937 }
938 return resultStr;
939};
940
941
942/**
943 * Removes the first occurrence of a substring from a string.
944 * @param {string} s The base string from which to remove.
945 * @param {string} ss The string to remove.
946 * @return {string} A copy of {@code s} with {@code ss} removed or the full
947 * string if nothing is removed.
948 */
949goog.string.remove = function(s, ss) {
950 var re = new RegExp(goog.string.regExpEscape(ss), '');
951 return s.replace(re, '');
952};
953
954
955/**
956 * Removes all occurrences of a substring from a string.
957 * @param {string} s The base string from which to remove.
958 * @param {string} ss The string to remove.
959 * @return {string} A copy of {@code s} with {@code ss} removed or the full
960 * string if nothing is removed.
961 */
962goog.string.removeAll = function(s, ss) {
963 var re = new RegExp(goog.string.regExpEscape(ss), 'g');
964 return s.replace(re, '');
965};
966
967
968/**
969 * Escapes characters in the string that are not safe to use in a RegExp.
970 * @param {*} s The string to escape. If not a string, it will be casted
971 * to one.
972 * @return {string} A RegExp safe, escaped copy of {@code s}.
973 */
974goog.string.regExpEscape = function(s) {
975 return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
976 replace(/\x08/g, '\\x08');
977};
978
979
980/**
981 * Repeats a string n times.
982 * @param {string} string The string to repeat.
983 * @param {number} length The number of times to repeat.
984 * @return {string} A string containing {@code length} repetitions of
985 * {@code string}.
986 */
987goog.string.repeat = function(string, length) {
988 return new Array(length + 1).join(string);
989};
990
991
992/**
993 * Pads number to given length and optionally rounds it to a given precision.
994 * For example:
995 * <pre>padNumber(1.25, 2, 3) -> '01.250'
996 * padNumber(1.25, 2) -> '01.25'
997 * padNumber(1.25, 2, 1) -> '01.3'
998 * padNumber(1.25, 0) -> '1.25'</pre>
999 *
1000 * @param {number} num The number to pad.
1001 * @param {number} length The desired length.
1002 * @param {number=} opt_precision The desired precision.
1003 * @return {string} {@code num} as a string with the given options.
1004 */
1005goog.string.padNumber = function(num, length, opt_precision) {
1006 var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num);
1007 var index = s.indexOf('.');
1008 if (index == -1) {
1009 index = s.length;
1010 }
1011 return goog.string.repeat('0', Math.max(0, length - index)) + s;
1012};
1013
1014
1015/**
1016 * Returns a string representation of the given object, with
1017 * null and undefined being returned as the empty string.
1018 *
1019 * @param {*} obj The object to convert.
1020 * @return {string} A string representation of the {@code obj}.
1021 */
1022goog.string.makeSafe = function(obj) {
1023 return obj == null ? '' : String(obj);
1024};
1025
1026
1027/**
1028 * Concatenates string expressions. This is useful
1029 * since some browsers are very inefficient when it comes to using plus to
1030 * concat strings. Be careful when using null and undefined here since
1031 * these will not be included in the result. If you need to represent these
1032 * be sure to cast the argument to a String first.
1033 * For example:
1034 * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd'
1035 * buildString(null, undefined) -> ''
1036 * </pre>
1037 * @param {...*} var_args A list of strings to concatenate. If not a string,
1038 * it will be casted to one.
1039 * @return {string} The concatenation of {@code var_args}.
1040 */
1041goog.string.buildString = function(var_args) {
1042 return Array.prototype.join.call(arguments, '');
1043};
1044
1045
1046/**
1047 * Returns a string with at least 64-bits of randomness.
1048 *
1049 * Doesn't trust Javascript's random function entirely. Uses a combination of
1050 * random and current timestamp, and then encodes the string in base-36 to
1051 * make it shorter.
1052 *
1053 * @return {string} A random string, e.g. sn1s7vb4gcic.
1054 */
1055goog.string.getRandomString = function() {
1056 var x = 2147483648;
1057 return Math.floor(Math.random() * x).toString(36) +
1058 Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36);
1059};
1060
1061
1062/**
1063 * Compares two version numbers.
1064 *
1065 * @param {string|number} version1 Version of first item.
1066 * @param {string|number} version2 Version of second item.
1067 *
1068 * @return {number} 1 if {@code version1} is higher.
1069 * 0 if arguments are equal.
1070 * -1 if {@code version2} is higher.
1071 */
1072goog.string.compareVersions = function(version1, version2) {
1073 var order = 0;
1074 // Trim leading and trailing whitespace and split the versions into
1075 // subversions.
1076 var v1Subs = goog.string.trim(String(version1)).split('.');
1077 var v2Subs = goog.string.trim(String(version2)).split('.');
1078 var subCount = Math.max(v1Subs.length, v2Subs.length);
1079
1080 // Iterate over the subversions, as long as they appear to be equivalent.
1081 for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) {
1082 var v1Sub = v1Subs[subIdx] || '';
1083 var v2Sub = v2Subs[subIdx] || '';
1084
1085 // Split the subversions into pairs of numbers and qualifiers (like 'b').
1086 // Two different RegExp objects are needed because they are both using
1087 // the 'g' flag.
1088 var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1089 var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1090 do {
1091 var v1Comp = v1CompParser.exec(v1Sub) || ['', '', ''];
1092 var v2Comp = v2CompParser.exec(v2Sub) || ['', '', ''];
1093 // Break if there are no more matches.
1094 if (v1Comp[0].length == 0 && v2Comp[0].length == 0) {
1095 break;
1096 }
1097
1098 // Parse the numeric part of the subversion. A missing number is
1099 // equivalent to 0.
1100 var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10);
1101 var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10);
1102
1103 // Compare the subversion components. The number has the highest
1104 // precedence. Next, if the numbers are equal, a subversion without any
1105 // qualifier is always higher than a subversion with any qualifier. Next,
1106 // the qualifiers are compared as strings.
1107 order = goog.string.compareElements_(v1CompNum, v2CompNum) ||
1108 goog.string.compareElements_(v1Comp[2].length == 0,
1109 v2Comp[2].length == 0) ||
1110 goog.string.compareElements_(v1Comp[2], v2Comp[2]);
1111 // Stop as soon as an inequality is discovered.
1112 } while (order == 0);
1113 }
1114
1115 return order;
1116};
1117
1118
1119/**
1120 * Compares elements of a version number.
1121 *
1122 * @param {string|number|boolean} left An element from a version number.
1123 * @param {string|number|boolean} right An element from a version number.
1124 *
1125 * @return {number} 1 if {@code left} is higher.
1126 * 0 if arguments are equal.
1127 * -1 if {@code right} is higher.
1128 * @private
1129 */
1130goog.string.compareElements_ = function(left, right) {
1131 if (left < right) {
1132 return -1;
1133 } else if (left > right) {
1134 return 1;
1135 }
1136 return 0;
1137};
1138
1139
1140/**
1141 * Maximum value of #goog.string.hashCode, exclusive. 2^32.
1142 * @type {number}
1143 * @private
1144 */
1145goog.string.HASHCODE_MAX_ = 0x100000000;
1146
1147
1148/**
1149 * String hash function similar to java.lang.String.hashCode().
1150 * The hash code for a string is computed as
1151 * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
1152 * where s[i] is the ith character of the string and n is the length of
1153 * the string. We mod the result to make it between 0 (inclusive) and 2^32
1154 * (exclusive).
1155 * @param {string} str A string.
1156 * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32
1157 * (exclusive). The empty string returns 0.
1158 */
1159goog.string.hashCode = function(str) {
1160 var result = 0;
1161 for (var i = 0; i < str.length; ++i) {
1162 result = 31 * result + str.charCodeAt(i);
1163 // Normalize to 4 byte range, 0 ... 2^32.
1164 result %= goog.string.HASHCODE_MAX_;
1165 }
1166 return result;
1167};
1168
1169
1170/**
1171 * The most recent unique ID. |0 is equivalent to Math.floor in this case.
1172 * @type {number}
1173 * @private
1174 */
1175goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0;
1176
1177
1178/**
1179 * Generates and returns a string which is unique in the current document.
1180 * This is useful, for example, to create unique IDs for DOM elements.
1181 * @return {string} A unique id.
1182 */
1183goog.string.createUniqueString = function() {
1184 return 'goog_' + goog.string.uniqueStringCounter_++;
1185};
1186
1187
1188/**
1189 * Converts the supplied string to a number, which may be Ininity or NaN.
1190 * This function strips whitespace: (toNumber(' 123') === 123)
1191 * This function accepts scientific notation: (toNumber('1e1') === 10)
1192 *
1193 * This is better than Javascript's built-in conversions because, sadly:
1194 * (Number(' ') === 0) and (parseFloat('123a') === 123)
1195 *
1196 * @param {string} str The string to convert.
1197 * @return {number} The number the supplied string represents, or NaN.
1198 */
1199goog.string.toNumber = function(str) {
1200 var num = Number(str);
1201 if (num == 0 && goog.string.isEmpty(str)) {
1202 return NaN;
1203 }
1204 return num;
1205};
1206
1207
1208/**
1209 * Returns whether the given string is lower camel case (e.g. "isFooBar").
1210 *
1211 * Note that this assumes the string is entirely letters.
1212 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1213 *
1214 * @param {string} str String to test.
1215 * @return {boolean} Whether the string is lower camel case.
1216 */
1217goog.string.isLowerCamelCase = function(str) {
1218 return /^[a-z]+([A-Z][a-z]*)*$/.test(str);
1219};
1220
1221
1222/**
1223 * Returns whether the given string is upper camel case (e.g. "FooBarBaz").
1224 *
1225 * Note that this assumes the string is entirely letters.
1226 * @see http://en.wikipedia.org/wiki/CamelCase#Variations_and_synonyms
1227 *
1228 * @param {string} str String to test.
1229 * @return {boolean} Whether the string is upper camel case.
1230 */
1231goog.string.isUpperCamelCase = function(str) {
1232 return /^([A-Z][a-z]*)+$/.test(str);
1233};
1234
1235
1236/**
1237 * Converts a string from selector-case to camelCase (e.g. from
1238 * "multi-part-string" to "multiPartString"), useful for converting
1239 * CSS selectors and HTML dataset keys to their equivalent JS properties.
1240 * @param {string} str The string in selector-case form.
1241 * @return {string} The string in camelCase form.
1242 */
1243goog.string.toCamelCase = function(str) {
1244 return String(str).replace(/\-([a-z])/g, function(all, match) {
1245 return match.toUpperCase();
1246 });
1247};
1248
1249
1250/**
1251 * Converts a string from camelCase to selector-case (e.g. from
1252 * "multiPartString" to "multi-part-string"), useful for converting JS
1253 * style and dataset properties to equivalent CSS selectors and HTML keys.
1254 * @param {string} str The string in camelCase form.
1255 * @return {string} The string in selector-case form.
1256 */
1257goog.string.toSelectorCase = function(str) {
1258 return String(str).replace(/([A-Z])/g, '-$1').toLowerCase();
1259};
1260
1261
1262/**
1263 * Converts a string into TitleCase. First character of the string is always
1264 * capitalized in addition to the first letter of every subsequent word.
1265 * Words are delimited by one or more whitespaces by default. Custom delimiters
1266 * can optionally be specified to replace the default, which doesn't preserve
1267 * whitespace delimiters and instead must be explicitly included if needed.
1268 *
1269 * Default delimiter => " ":
1270 * goog.string.toTitleCase('oneTwoThree') => 'OneTwoThree'
1271 * goog.string.toTitleCase('one two three') => 'One Two Three'
1272 * goog.string.toTitleCase(' one two ') => ' One Two '
1273 * goog.string.toTitleCase('one_two_three') => 'One_two_three'
1274 * goog.string.toTitleCase('one-two-three') => 'One-two-three'
1275 *
1276 * Custom delimiter => "_-.":
1277 * goog.string.toTitleCase('oneTwoThree', '_-.') => 'OneTwoThree'
1278 * goog.string.toTitleCase('one two three', '_-.') => 'One two three'
1279 * goog.string.toTitleCase(' one two ', '_-.') => ' one two '
1280 * goog.string.toTitleCase('one_two_three', '_-.') => 'One_Two_Three'
1281 * goog.string.toTitleCase('one-two-three', '_-.') => 'One-Two-Three'
1282 * goog.string.toTitleCase('one...two...three', '_-.') => 'One...Two...Three'
1283 * goog.string.toTitleCase('one. two. three', '_-.') => 'One. two. three'
1284 * goog.string.toTitleCase('one-two.three', '_-.') => 'One-Two.Three'
1285 *
1286 * @param {string} str String value in camelCase form.
1287 * @param {string=} opt_delimiters Custom delimiter character set used to
1288 * distinguish words in the string value. Each character represents a
1289 * single delimiter. When provided, default whitespace delimiter is
1290 * overridden and must be explicitly included if needed.
1291 * @return {string} String value in TitleCase form.
1292 */
1293goog.string.toTitleCase = function(str, opt_delimiters) {
1294 var delimiters = goog.isString(opt_delimiters) ?
1295 goog.string.regExpEscape(opt_delimiters) : '\\s';
1296
1297 // For IE8, we need to prevent using an empty character set. Otherwise,
1298 // incorrect matching will occur.
1299 delimiters = delimiters ? '|[' + delimiters + ']+' : '';
1300
1301 var regexp = new RegExp('(^' + delimiters + ')([a-z])', 'g');
1302 return str.replace(regexp, function(all, p1, p2) {
1303 return p1 + p2.toUpperCase();
1304 });
1305};
1306
1307
1308/**
1309 * Parse a string in decimal or hexidecimal ('0xFFFF') form.
1310 *
1311 * To parse a particular radix, please use parseInt(string, radix) directly. See
1312 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/parseInt
1313 *
1314 * This is a wrapper for the built-in parseInt function that will only parse
1315 * numbers as base 10 or base 16. Some JS implementations assume strings
1316 * starting with "0" are intended to be octal. ES3 allowed but discouraged
1317 * this behavior. ES5 forbids it. This function emulates the ES5 behavior.
1318 *
1319 * For more information, see Mozilla JS Reference: http://goo.gl/8RiFj
1320 *
1321 * @param {string|number|null|undefined} value The value to be parsed.
1322 * @return {number} The number, parsed. If the string failed to parse, this
1323 * will be NaN.
1324 */
1325goog.string.parseInt = function(value) {
1326 // Force finite numbers to strings.
1327 if (isFinite(value)) {
1328 value = String(value);
1329 }
1330
1331 if (goog.isString(value)) {
1332 // If the string starts with '0x' or '-0x', parse as hex.
1333 return /^\s*-?0x/i.test(value) ?
1334 parseInt(value, 16) : parseInt(value, 10);
1335 }
1336
1337 return NaN;
1338};
1339
1340
1341/**
1342 * Splits a string on a separator a limited number of times.
1343 *
1344 * This implementation is more similar to Python or Java, where the limit
1345 * parameter specifies the maximum number of splits rather than truncating
1346 * the number of results.
1347 *
1348 * See http://docs.python.org/2/library/stdtypes.html#str.split
1349 * See JavaDoc: http://goo.gl/F2AsY
1350 * See Mozilla reference: http://goo.gl/dZdZs
1351 *
1352 * @param {string} str String to split.
1353 * @param {string} separator The separator.
1354 * @param {number} limit The limit to the number of splits. The resulting array
1355 * will have a maximum length of limit+1. Negative numbers are the same
1356 * as zero.
1357 * @return {!Array.<string>} The string, split.
1358 */
1359
1360goog.string.splitLimit = function(str, separator, limit) {
1361 var parts = str.split(separator);
1362 var returnVal = [];
1363
1364 // Only continue doing this while we haven't hit the limit and we have
1365 // parts left.
1366 while (limit > 0 && parts.length) {
1367 returnVal.push(parts.shift());
1368 limit--;
1369 }
1370
1371 // If there are remaining parts, append them to the end.
1372 if (parts.length) {
1373 returnVal.push(parts.join(separator));
1374 }
1375
1376 return returnVal;
1377};
1378