lib/goog/i18n/bidi.js

1// Copyright 2007 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Utility functions for supporting Bidi issues.
17 */
18
19
20/**
21 * Namespace for bidi supporting functions.
22 */
23goog.provide('goog.i18n.bidi');
24goog.provide('goog.i18n.bidi.Dir');
25goog.provide('goog.i18n.bidi.DirectionalString');
26goog.provide('goog.i18n.bidi.Format');
27
28
29/**
30 * @define {boolean} FORCE_RTL forces the {@link goog.i18n.bidi.IS_RTL} constant
31 * to say that the current locale is a RTL locale. This should only be used
32 * if you want to override the default behavior for deciding whether the
33 * current locale is RTL or not.
34 *
35 * {@see goog.i18n.bidi.IS_RTL}
36 */
37goog.define('goog.i18n.bidi.FORCE_RTL', false);
38
39
40/**
41 * Constant that defines whether or not the current locale is a RTL locale.
42 * If {@link goog.i18n.bidi.FORCE_RTL} is not true, this constant will default
43 * to check that {@link goog.LOCALE} is one of a few major RTL locales.
44 *
45 * <p>This is designed to be a maximally efficient compile-time constant. For
46 * example, for the default goog.LOCALE, compiling
47 * "if (goog.i18n.bidi.IS_RTL) alert('rtl') else {}" should produce no code. It
48 * is this design consideration that limits the implementation to only
49 * supporting a few major RTL locales, as opposed to the broader repertoire of
50 * something like goog.i18n.bidi.isRtlLanguage.
51 *
52 * <p>Since this constant refers to the directionality of the locale, it is up
53 * to the caller to determine if this constant should also be used for the
54 * direction of the UI.
55 *
56 * {@see goog.LOCALE}
57 *
58 * @type {boolean}
59 *
60 * TODO(user): write a test that checks that this is a compile-time constant.
61 */
62goog.i18n.bidi.IS_RTL = goog.i18n.bidi.FORCE_RTL ||
63 (
64 (goog.LOCALE.substring(0, 2).toLowerCase() == 'ar' ||
65 goog.LOCALE.substring(0, 2).toLowerCase() == 'fa' ||
66 goog.LOCALE.substring(0, 2).toLowerCase() == 'he' ||
67 goog.LOCALE.substring(0, 2).toLowerCase() == 'iw' ||
68 goog.LOCALE.substring(0, 2).toLowerCase() == 'ps' ||
69 goog.LOCALE.substring(0, 2).toLowerCase() == 'sd' ||
70 goog.LOCALE.substring(0, 2).toLowerCase() == 'ug' ||
71 goog.LOCALE.substring(0, 2).toLowerCase() == 'ur' ||
72 goog.LOCALE.substring(0, 2).toLowerCase() == 'yi') &&
73 (goog.LOCALE.length == 2 ||
74 goog.LOCALE.substring(2, 3) == '-' ||
75 goog.LOCALE.substring(2, 3) == '_')
76 ) || (
77 goog.LOCALE.length >= 3 &&
78 goog.LOCALE.substring(0, 3).toLowerCase() == 'ckb' &&
79 (goog.LOCALE.length == 3 ||
80 goog.LOCALE.substring(3, 4) == '-' ||
81 goog.LOCALE.substring(3, 4) == '_')
82 );
83
84
85/**
86 * Unicode formatting characters and directionality string constants.
87 * @enum {string}
88 */
89goog.i18n.bidi.Format = {
90 /** Unicode "Left-To-Right Embedding" (LRE) character. */
91 LRE: '\u202A',
92 /** Unicode "Right-To-Left Embedding" (RLE) character. */
93 RLE: '\u202B',
94 /** Unicode "Pop Directional Formatting" (PDF) character. */
95 PDF: '\u202C',
96 /** Unicode "Left-To-Right Mark" (LRM) character. */
97 LRM: '\u200E',
98 /** Unicode "Right-To-Left Mark" (RLM) character. */
99 RLM: '\u200F'
100};
101
102
103/**
104 * Directionality enum.
105 * @enum {number}
106 */
107goog.i18n.bidi.Dir = {
108 /**
109 * Left-to-right.
110 */
111 LTR: 1,
112
113 /**
114 * Right-to-left.
115 */
116 RTL: -1,
117
118 /**
119 * Neither left-to-right nor right-to-left.
120 */
121 NEUTRAL: 0
122};
123
124
125/**
126 * 'right' string constant.
127 * @type {string}
128 */
129goog.i18n.bidi.RIGHT = 'right';
130
131
132/**
133 * 'left' string constant.
134 * @type {string}
135 */
136goog.i18n.bidi.LEFT = 'left';
137
138
139/**
140 * 'left' if locale is RTL, 'right' if not.
141 * @type {string}
142 */
143goog.i18n.bidi.I18N_RIGHT = goog.i18n.bidi.IS_RTL ? goog.i18n.bidi.LEFT :
144 goog.i18n.bidi.RIGHT;
145
146
147/**
148 * 'right' if locale is RTL, 'left' if not.
149 * @type {string}
150 */
151goog.i18n.bidi.I18N_LEFT = goog.i18n.bidi.IS_RTL ? goog.i18n.bidi.RIGHT :
152 goog.i18n.bidi.LEFT;
153
154
155/**
156 * Convert a directionality given in various formats to a goog.i18n.bidi.Dir
157 * constant. Useful for interaction with different standards of directionality
158 * representation.
159 *
160 * @param {goog.i18n.bidi.Dir|number|boolean|null} givenDir Directionality given
161 * in one of the following formats:
162 * 1. A goog.i18n.bidi.Dir constant.
163 * 2. A number (positive = LTR, negative = RTL, 0 = neutral).
164 * 3. A boolean (true = RTL, false = LTR).
165 * 4. A null for unknown directionality.
166 * @param {boolean=} opt_noNeutral Whether a givenDir of zero or
167 * goog.i18n.bidi.Dir.NEUTRAL should be treated as null, i.e. unknown, in
168 * order to preserve legacy behavior.
169 * @return {?goog.i18n.bidi.Dir} A goog.i18n.bidi.Dir constant matching the
170 * given directionality. If given null, returns null (i.e. unknown).
171 */
172goog.i18n.bidi.toDir = function(givenDir, opt_noNeutral) {
173 if (typeof givenDir == 'number') {
174 // This includes the non-null goog.i18n.bidi.Dir case.
175 return givenDir > 0 ? goog.i18n.bidi.Dir.LTR :
176 givenDir < 0 ? goog.i18n.bidi.Dir.RTL :
177 opt_noNeutral ? null : goog.i18n.bidi.Dir.NEUTRAL;
178 } else if (givenDir == null) {
179 return null;
180 } else {
181 // Must be typeof givenDir == 'boolean'.
182 return givenDir ? goog.i18n.bidi.Dir.RTL : goog.i18n.bidi.Dir.LTR;
183 }
184};
185
186
187/**
188 * A practical pattern to identify strong LTR characters. This pattern is not
189 * theoretically correct according to the Unicode standard. It is simplified for
190 * performance and small code size.
191 * @type {string}
192 * @private
193 */
194goog.i18n.bidi.ltrChars_ =
195 'A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF' +
196 '\u200E\u2C00-\uFB1C\uFE00-\uFE6F\uFEFD-\uFFFF';
197
198
199/**
200 * A practical pattern to identify strong RTL character. This pattern is not
201 * theoretically correct according to the Unicode standard. It is simplified
202 * for performance and small code size.
203 * @type {string}
204 * @private
205 */
206goog.i18n.bidi.rtlChars_ =
207 '\u0591-\u06EF\u06FA-\u07FF\u200F\uFB1D-\uFDFF\uFE70-\uFEFC';
208
209
210/**
211 * Simplified regular expression for an HTML tag (opening or closing) or an HTML
212 * escape. We might want to skip over such expressions when estimating the text
213 * directionality.
214 * @type {RegExp}
215 * @private
216 */
217goog.i18n.bidi.htmlSkipReg_ = /<[^>]*>|&[^;]+;/g;
218
219
220/**
221 * Returns the input text with spaces instead of HTML tags or HTML escapes, if
222 * opt_isStripNeeded is true. Else returns the input as is.
223 * Useful for text directionality estimation.
224 * Note: the function should not be used in other contexts; it is not 100%
225 * correct, but rather a good-enough implementation for directionality
226 * estimation purposes.
227 * @param {string} str The given string.
228 * @param {boolean=} opt_isStripNeeded Whether to perform the stripping.
229 * Default: false (to retain consistency with calling functions).
230 * @return {string} The given string cleaned of HTML tags / escapes.
231 * @private
232 */
233goog.i18n.bidi.stripHtmlIfNeeded_ = function(str, opt_isStripNeeded) {
234 return opt_isStripNeeded ? str.replace(goog.i18n.bidi.htmlSkipReg_, '') :
235 str;
236};
237
238
239/**
240 * Regular expression to check for RTL characters.
241 * @type {RegExp}
242 * @private
243 */
244goog.i18n.bidi.rtlCharReg_ = new RegExp('[' + goog.i18n.bidi.rtlChars_ + ']');
245
246
247/**
248 * Regular expression to check for LTR characters.
249 * @type {RegExp}
250 * @private
251 */
252goog.i18n.bidi.ltrCharReg_ = new RegExp('[' + goog.i18n.bidi.ltrChars_ + ']');
253
254
255/**
256 * Test whether the given string has any RTL characters in it.
257 * @param {string} str The given string that need to be tested.
258 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
259 * Default: false.
260 * @return {boolean} Whether the string contains RTL characters.
261 */
262goog.i18n.bidi.hasAnyRtl = function(str, opt_isHtml) {
263 return goog.i18n.bidi.rtlCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(
264 str, opt_isHtml));
265};
266
267
268/**
269 * Test whether the given string has any RTL characters in it.
270 * @param {string} str The given string that need to be tested.
271 * @return {boolean} Whether the string contains RTL characters.
272 * @deprecated Use hasAnyRtl.
273 */
274goog.i18n.bidi.hasRtlChar = goog.i18n.bidi.hasAnyRtl;
275
276
277/**
278 * Test whether the given string has any LTR characters in it.
279 * @param {string} str The given string that need to be tested.
280 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
281 * Default: false.
282 * @return {boolean} Whether the string contains LTR characters.
283 */
284goog.i18n.bidi.hasAnyLtr = function(str, opt_isHtml) {
285 return goog.i18n.bidi.ltrCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(
286 str, opt_isHtml));
287};
288
289
290/**
291 * Regular expression pattern to check if the first character in the string
292 * is LTR.
293 * @type {RegExp}
294 * @private
295 */
296goog.i18n.bidi.ltrRe_ = new RegExp('^[' + goog.i18n.bidi.ltrChars_ + ']');
297
298
299/**
300 * Regular expression pattern to check if the first character in the string
301 * is RTL.
302 * @type {RegExp}
303 * @private
304 */
305goog.i18n.bidi.rtlRe_ = new RegExp('^[' + goog.i18n.bidi.rtlChars_ + ']');
306
307
308/**
309 * Check if the first character in the string is RTL or not.
310 * @param {string} str The given string that need to be tested.
311 * @return {boolean} Whether the first character in str is an RTL char.
312 */
313goog.i18n.bidi.isRtlChar = function(str) {
314 return goog.i18n.bidi.rtlRe_.test(str);
315};
316
317
318/**
319 * Check if the first character in the string is LTR or not.
320 * @param {string} str The given string that need to be tested.
321 * @return {boolean} Whether the first character in str is an LTR char.
322 */
323goog.i18n.bidi.isLtrChar = function(str) {
324 return goog.i18n.bidi.ltrRe_.test(str);
325};
326
327
328/**
329 * Check if the first character in the string is neutral or not.
330 * @param {string} str The given string that need to be tested.
331 * @return {boolean} Whether the first character in str is a neutral char.
332 */
333goog.i18n.bidi.isNeutralChar = function(str) {
334 return !goog.i18n.bidi.isLtrChar(str) && !goog.i18n.bidi.isRtlChar(str);
335};
336
337
338/**
339 * Regular expressions to check if a piece of text is of LTR directionality
340 * on first character with strong directionality.
341 * @type {RegExp}
342 * @private
343 */
344goog.i18n.bidi.ltrDirCheckRe_ = new RegExp(
345 '^[^' + goog.i18n.bidi.rtlChars_ + ']*[' + goog.i18n.bidi.ltrChars_ + ']');
346
347
348/**
349 * Regular expressions to check if a piece of text is of RTL directionality
350 * on first character with strong directionality.
351 * @type {RegExp}
352 * @private
353 */
354goog.i18n.bidi.rtlDirCheckRe_ = new RegExp(
355 '^[^' + goog.i18n.bidi.ltrChars_ + ']*[' + goog.i18n.bidi.rtlChars_ + ']');
356
357
358/**
359 * Check whether the first strongly directional character (if any) is RTL.
360 * @param {string} str String being checked.
361 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
362 * Default: false.
363 * @return {boolean} Whether RTL directionality is detected using the first
364 * strongly-directional character method.
365 */
366goog.i18n.bidi.startsWithRtl = function(str, opt_isHtml) {
367 return goog.i18n.bidi.rtlDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(
368 str, opt_isHtml));
369};
370
371
372/**
373 * Check whether the first strongly directional character (if any) is RTL.
374 * @param {string} str String being checked.
375 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
376 * Default: false.
377 * @return {boolean} Whether RTL directionality is detected using the first
378 * strongly-directional character method.
379 * @deprecated Use startsWithRtl.
380 */
381goog.i18n.bidi.isRtlText = goog.i18n.bidi.startsWithRtl;
382
383
384/**
385 * Check whether the first strongly directional character (if any) is LTR.
386 * @param {string} str String being checked.
387 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
388 * Default: false.
389 * @return {boolean} Whether LTR directionality is detected using the first
390 * strongly-directional character method.
391 */
392goog.i18n.bidi.startsWithLtr = function(str, opt_isHtml) {
393 return goog.i18n.bidi.ltrDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(
394 str, opt_isHtml));
395};
396
397
398/**
399 * Check whether the first strongly directional character (if any) is LTR.
400 * @param {string} str String being checked.
401 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
402 * Default: false.
403 * @return {boolean} Whether LTR directionality is detected using the first
404 * strongly-directional character method.
405 * @deprecated Use startsWithLtr.
406 */
407goog.i18n.bidi.isLtrText = goog.i18n.bidi.startsWithLtr;
408
409
410/**
411 * Regular expression to check if a string looks like something that must
412 * always be LTR even in RTL text, e.g. a URL. When estimating the
413 * directionality of text containing these, we treat these as weakly LTR,
414 * like numbers.
415 * @type {RegExp}
416 * @private
417 */
418goog.i18n.bidi.isRequiredLtrRe_ = /^http:\/\/.*/;
419
420
421/**
422 * Check whether the input string either contains no strongly directional
423 * characters or looks like a url.
424 * @param {string} str String being checked.
425 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
426 * Default: false.
427 * @return {boolean} Whether neutral directionality is detected.
428 */
429goog.i18n.bidi.isNeutralText = function(str, opt_isHtml) {
430 str = goog.i18n.bidi.stripHtmlIfNeeded_(str, opt_isHtml);
431 return goog.i18n.bidi.isRequiredLtrRe_.test(str) ||
432 !goog.i18n.bidi.hasAnyLtr(str) && !goog.i18n.bidi.hasAnyRtl(str);
433};
434
435
436/**
437 * Regular expressions to check if the last strongly-directional character in a
438 * piece of text is LTR.
439 * @type {RegExp}
440 * @private
441 */
442goog.i18n.bidi.ltrExitDirCheckRe_ = new RegExp(
443 '[' + goog.i18n.bidi.ltrChars_ + '][^' + goog.i18n.bidi.rtlChars_ + ']*$');
444
445
446/**
447 * Regular expressions to check if the last strongly-directional character in a
448 * piece of text is RTL.
449 * @type {RegExp}
450 * @private
451 */
452goog.i18n.bidi.rtlExitDirCheckRe_ = new RegExp(
453 '[' + goog.i18n.bidi.rtlChars_ + '][^' + goog.i18n.bidi.ltrChars_ + ']*$');
454
455
456/**
457 * Check if the exit directionality a piece of text is LTR, i.e. if the last
458 * strongly-directional character in the string is LTR.
459 * @param {string} str String being checked.
460 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
461 * Default: false.
462 * @return {boolean} Whether LTR exit directionality was detected.
463 */
464goog.i18n.bidi.endsWithLtr = function(str, opt_isHtml) {
465 return goog.i18n.bidi.ltrExitDirCheckRe_.test(
466 goog.i18n.bidi.stripHtmlIfNeeded_(str, opt_isHtml));
467};
468
469
470/**
471 * Check if the exit directionality a piece of text is LTR, i.e. if the last
472 * strongly-directional character in the string is LTR.
473 * @param {string} str String being checked.
474 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
475 * Default: false.
476 * @return {boolean} Whether LTR exit directionality was detected.
477 * @deprecated Use endsWithLtr.
478 */
479goog.i18n.bidi.isLtrExitText = goog.i18n.bidi.endsWithLtr;
480
481
482/**
483 * Check if the exit directionality a piece of text is RTL, i.e. if the last
484 * strongly-directional character in the string is RTL.
485 * @param {string} str String being checked.
486 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
487 * Default: false.
488 * @return {boolean} Whether RTL exit directionality was detected.
489 */
490goog.i18n.bidi.endsWithRtl = function(str, opt_isHtml) {
491 return goog.i18n.bidi.rtlExitDirCheckRe_.test(
492 goog.i18n.bidi.stripHtmlIfNeeded_(str, opt_isHtml));
493};
494
495
496/**
497 * Check if the exit directionality a piece of text is RTL, i.e. if the last
498 * strongly-directional character in the string is RTL.
499 * @param {string} str String being checked.
500 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
501 * Default: false.
502 * @return {boolean} Whether RTL exit directionality was detected.
503 * @deprecated Use endsWithRtl.
504 */
505goog.i18n.bidi.isRtlExitText = goog.i18n.bidi.endsWithRtl;
506
507
508/**
509 * A regular expression for matching right-to-left language codes.
510 * See {@link #isRtlLanguage} for the design.
511 * @type {RegExp}
512 * @private
513 */
514goog.i18n.bidi.rtlLocalesRe_ = new RegExp(
515 '^(ar|ckb|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|' +
516 '.*[-_](Arab|Hebr|Thaa|Nkoo|Tfng))' +
517 '(?!.*[-_](Latn|Cyrl)($|-|_))($|-|_)',
518 'i');
519
520
521/**
522 * Check if a BCP 47 / III language code indicates an RTL language, i.e. either:
523 * - a language code explicitly specifying one of the right-to-left scripts,
524 * e.g. "az-Arab", or<p>
525 * - a language code specifying one of the languages normally written in a
526 * right-to-left script, e.g. "fa" (Farsi), except ones explicitly specifying
527 * Latin or Cyrillic script (which are the usual LTR alternatives).<p>
528 * The list of right-to-left scripts appears in the 100-199 range in
529 * http://www.unicode.org/iso15924/iso15924-num.html, of which Arabic and
530 * Hebrew are by far the most widely used. We also recognize Thaana, N'Ko, and
531 * Tifinagh, which also have significant modern usage. The rest (Syriac,
532 * Samaritan, Mandaic, etc.) seem to have extremely limited or no modern usage
533 * and are not recognized to save on code size.
534 * The languages usually written in a right-to-left script are taken as those
535 * with Suppress-Script: Hebr|Arab|Thaa|Nkoo|Tfng in
536 * http://www.iana.org/assignments/language-subtag-registry,
537 * as well as Central (or Sorani) Kurdish (ckb), Sindhi (sd) and Uyghur (ug).
538 * Other subtags of the language code, e.g. regions like EG (Egypt), are
539 * ignored.
540 * @param {string} lang BCP 47 (a.k.a III) language code.
541 * @return {boolean} Whether the language code is an RTL language.
542 */
543goog.i18n.bidi.isRtlLanguage = function(lang) {
544 return goog.i18n.bidi.rtlLocalesRe_.test(lang);
545};
546
547
548/**
549 * Regular expression for bracket guard replacement in html.
550 * @type {RegExp}
551 * @private
552 */
553goog.i18n.bidi.bracketGuardHtmlRe_ =
554 /(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(&lt;.*?(&gt;)+)/g;
555
556
557/**
558 * Regular expression for bracket guard replacement in text.
559 * @type {RegExp}
560 * @private
561 */
562goog.i18n.bidi.bracketGuardTextRe_ =
563 /(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)/g;
564
565
566/**
567 * Apply bracket guard using html span tag. This is to address the problem of
568 * messy bracket display frequently happens in RTL layout.
569 * @param {string} s The string that need to be processed.
570 * @param {boolean=} opt_isRtlContext specifies default direction (usually
571 * direction of the UI).
572 * @return {string} The processed string, with all bracket guarded.
573 */
574goog.i18n.bidi.guardBracketInHtml = function(s, opt_isRtlContext) {
575 var useRtl = opt_isRtlContext === undefined ?
576 goog.i18n.bidi.hasAnyRtl(s) : opt_isRtlContext;
577 if (useRtl) {
578 return s.replace(goog.i18n.bidi.bracketGuardHtmlRe_,
579 '<span dir=rtl>$&</span>');
580 }
581 return s.replace(goog.i18n.bidi.bracketGuardHtmlRe_,
582 '<span dir=ltr>$&</span>');
583};
584
585
586/**
587 * Apply bracket guard using LRM and RLM. This is to address the problem of
588 * messy bracket display frequently happens in RTL layout.
589 * This version works for both plain text and html. But it does not work as
590 * good as guardBracketInHtml in some cases.
591 * @param {string} s The string that need to be processed.
592 * @param {boolean=} opt_isRtlContext specifies default direction (usually
593 * direction of the UI).
594 * @return {string} The processed string, with all bracket guarded.
595 */
596goog.i18n.bidi.guardBracketInText = function(s, opt_isRtlContext) {
597 var useRtl = opt_isRtlContext === undefined ?
598 goog.i18n.bidi.hasAnyRtl(s) : opt_isRtlContext;
599 var mark = useRtl ? goog.i18n.bidi.Format.RLM : goog.i18n.bidi.Format.LRM;
600 return s.replace(goog.i18n.bidi.bracketGuardTextRe_, mark + '$&' + mark);
601};
602
603
604/**
605 * Enforce the html snippet in RTL directionality regardless overall context.
606 * If the html piece was enclosed by tag, dir will be applied to existing
607 * tag, otherwise a span tag will be added as wrapper. For this reason, if
608 * html snippet start with with tag, this tag must enclose the whole piece. If
609 * the tag already has a dir specified, this new one will override existing
610 * one in behavior (tested on FF and IE).
611 * @param {string} html The string that need to be processed.
612 * @return {string} The processed string, with directionality enforced to RTL.
613 */
614goog.i18n.bidi.enforceRtlInHtml = function(html) {
615 if (html.charAt(0) == '<') {
616 return html.replace(/<\w+/, '$& dir=rtl');
617 }
618 // '\n' is important for FF so that it won't incorrectly merge span groups
619 return '\n<span dir=rtl>' + html + '</span>';
620};
621
622
623/**
624 * Enforce RTL on both end of the given text piece using unicode BiDi formatting
625 * characters RLE and PDF.
626 * @param {string} text The piece of text that need to be wrapped.
627 * @return {string} The wrapped string after process.
628 */
629goog.i18n.bidi.enforceRtlInText = function(text) {
630 return goog.i18n.bidi.Format.RLE + text + goog.i18n.bidi.Format.PDF;
631};
632
633
634/**
635 * Enforce the html snippet in RTL directionality regardless overall context.
636 * If the html piece was enclosed by tag, dir will be applied to existing
637 * tag, otherwise a span tag will be added as wrapper. For this reason, if
638 * html snippet start with with tag, this tag must enclose the whole piece. If
639 * the tag already has a dir specified, this new one will override existing
640 * one in behavior (tested on FF and IE).
641 * @param {string} html The string that need to be processed.
642 * @return {string} The processed string, with directionality enforced to RTL.
643 */
644goog.i18n.bidi.enforceLtrInHtml = function(html) {
645 if (html.charAt(0) == '<') {
646 return html.replace(/<\w+/, '$& dir=ltr');
647 }
648 // '\n' is important for FF so that it won't incorrectly merge span groups
649 return '\n<span dir=ltr>' + html + '</span>';
650};
651
652
653/**
654 * Enforce LTR on both end of the given text piece using unicode BiDi formatting
655 * characters LRE and PDF.
656 * @param {string} text The piece of text that need to be wrapped.
657 * @return {string} The wrapped string after process.
658 */
659goog.i18n.bidi.enforceLtrInText = function(text) {
660 return goog.i18n.bidi.Format.LRE + text + goog.i18n.bidi.Format.PDF;
661};
662
663
664/**
665 * Regular expression to find dimensions such as "padding: .3 0.4ex 5px 6;"
666 * @type {RegExp}
667 * @private
668 */
669goog.i18n.bidi.dimensionsRe_ =
670 /:\s*([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)/g;
671
672
673/**
674 * Regular expression for left.
675 * @type {RegExp}
676 * @private
677 */
678goog.i18n.bidi.leftRe_ = /left/gi;
679
680
681/**
682 * Regular expression for right.
683 * @type {RegExp}
684 * @private
685 */
686goog.i18n.bidi.rightRe_ = /right/gi;
687
688
689/**
690 * Placeholder regular expression for swapping.
691 * @type {RegExp}
692 * @private
693 */
694goog.i18n.bidi.tempRe_ = /%%%%/g;
695
696
697/**
698 * Swap location parameters and 'left'/'right' in CSS specification. The
699 * processed string will be suited for RTL layout. Though this function can
700 * cover most cases, there are always exceptions. It is suggested to put
701 * those exceptions in separate group of CSS string.
702 * @param {string} cssStr CSS spefication string.
703 * @return {string} Processed CSS specification string.
704 */
705goog.i18n.bidi.mirrorCSS = function(cssStr) {
706 return cssStr.
707 // reverse dimensions
708 replace(goog.i18n.bidi.dimensionsRe_, ':$1 $4 $3 $2').
709 replace(goog.i18n.bidi.leftRe_, '%%%%'). // swap left and right
710 replace(goog.i18n.bidi.rightRe_, goog.i18n.bidi.LEFT).
711 replace(goog.i18n.bidi.tempRe_, goog.i18n.bidi.RIGHT);
712};
713
714
715/**
716 * Regular expression for hebrew double quote substitution, finding quote
717 * directly after hebrew characters.
718 * @type {RegExp}
719 * @private
720 */
721goog.i18n.bidi.doubleQuoteSubstituteRe_ = /([\u0591-\u05f2])"/g;
722
723
724/**
725 * Regular expression for hebrew single quote substitution, finding quote
726 * directly after hebrew characters.
727 * @type {RegExp}
728 * @private
729 */
730goog.i18n.bidi.singleQuoteSubstituteRe_ = /([\u0591-\u05f2])'/g;
731
732
733/**
734 * Replace the double and single quote directly after a Hebrew character with
735 * GERESH and GERSHAYIM. In such case, most likely that's user intention.
736 * @param {string} str String that need to be processed.
737 * @return {string} Processed string with double/single quote replaced.
738 */
739goog.i18n.bidi.normalizeHebrewQuote = function(str) {
740 return str.
741 replace(goog.i18n.bidi.doubleQuoteSubstituteRe_, '$1\u05f4').
742 replace(goog.i18n.bidi.singleQuoteSubstituteRe_, '$1\u05f3');
743};
744
745
746/**
747 * Regular expression to split a string into "words" for directionality
748 * estimation based on relative word counts.
749 * @type {RegExp}
750 * @private
751 */
752goog.i18n.bidi.wordSeparatorRe_ = /\s+/;
753
754
755/**
756 * Regular expression to check if a string contains any numerals. Used to
757 * differentiate between completely neutral strings and those containing
758 * numbers, which are weakly LTR.
759 *
760 * Native Arabic digits (\u0660 - \u0669) are not included because although they
761 * do flow left-to-right inside a number, this is the case even if the overall
762 * directionality is RTL, and a mathematical expression using these digits is
763 * supposed to flow right-to-left overall, including unary plus and minus
764 * appearing to the right of a number, and this does depend on the overall
765 * directionality being RTL. The digits used in Farsi (\u06F0 - \u06F9), on the
766 * other hand, are included, since Farsi math (including unary plus and minus)
767 * does flow left-to-right.
768 *
769 * @type {RegExp}
770 * @private
771 */
772goog.i18n.bidi.hasNumeralsRe_ = /[\d\u06f0-\u06f9]/;
773
774
775/**
776 * This constant controls threshold of RTL directionality.
777 * @type {number}
778 * @private
779 */
780goog.i18n.bidi.rtlDetectionThreshold_ = 0.40;
781
782
783/**
784 * Estimates the directionality of a string based on relative word counts.
785 * If the number of RTL words is above a certain percentage of the total number
786 * of strongly directional words, returns RTL.
787 * Otherwise, if any words are strongly or weakly LTR, returns LTR.
788 * Otherwise, returns UNKNOWN, which is used to mean "neutral".
789 * Numbers are counted as weakly LTR.
790 * @param {string} str The string to be checked.
791 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
792 * Default: false.
793 * @return {goog.i18n.bidi.Dir} Estimated overall directionality of {@code str}.
794 */
795goog.i18n.bidi.estimateDirection = function(str, opt_isHtml) {
796 var rtlCount = 0;
797 var totalCount = 0;
798 var hasWeaklyLtr = false;
799 var tokens = goog.i18n.bidi.stripHtmlIfNeeded_(str, opt_isHtml).
800 split(goog.i18n.bidi.wordSeparatorRe_);
801 for (var i = 0; i < tokens.length; i++) {
802 var token = tokens[i];
803 if (goog.i18n.bidi.startsWithRtl(token)) {
804 rtlCount++;
805 totalCount++;
806 } else if (goog.i18n.bidi.isRequiredLtrRe_.test(token)) {
807 hasWeaklyLtr = true;
808 } else if (goog.i18n.bidi.hasAnyLtr(token)) {
809 totalCount++;
810 } else if (goog.i18n.bidi.hasNumeralsRe_.test(token)) {
811 hasWeaklyLtr = true;
812 }
813 }
814
815 return totalCount == 0 ?
816 (hasWeaklyLtr ? goog.i18n.bidi.Dir.LTR : goog.i18n.bidi.Dir.NEUTRAL) :
817 (rtlCount / totalCount > goog.i18n.bidi.rtlDetectionThreshold_ ?
818 goog.i18n.bidi.Dir.RTL : goog.i18n.bidi.Dir.LTR);
819};
820
821
822/**
823 * Check the directionality of a piece of text, return true if the piece of
824 * text should be laid out in RTL direction.
825 * @param {string} str The piece of text that need to be detected.
826 * @param {boolean=} opt_isHtml Whether str is HTML / HTML-escaped.
827 * Default: false.
828 * @return {boolean} Whether this piece of text should be laid out in RTL.
829 */
830goog.i18n.bidi.detectRtlDirectionality = function(str, opt_isHtml) {
831 return goog.i18n.bidi.estimateDirection(str, opt_isHtml) ==
832 goog.i18n.bidi.Dir.RTL;
833};
834
835
836/**
837 * Sets text input element's directionality and text alignment based on a
838 * given directionality. Does nothing if the given directionality is unknown or
839 * neutral.
840 * @param {Element} element Input field element to set directionality to.
841 * @param {goog.i18n.bidi.Dir|number|boolean|null} dir Desired directionality,
842 * given in one of the following formats:
843 * 1. A goog.i18n.bidi.Dir constant.
844 * 2. A number (positive = LRT, negative = RTL, 0 = neutral).
845 * 3. A boolean (true = RTL, false = LTR).
846 * 4. A null for unknown directionality.
847 */
848goog.i18n.bidi.setElementDirAndAlign = function(element, dir) {
849 if (element) {
850 dir = goog.i18n.bidi.toDir(dir);
851 if (dir) {
852 element.style.textAlign =
853 dir == goog.i18n.bidi.Dir.RTL ?
854 goog.i18n.bidi.RIGHT : goog.i18n.bidi.LEFT;
855 element.dir = dir == goog.i18n.bidi.Dir.RTL ? 'rtl' : 'ltr';
856 }
857 }
858};
859
860
861/**
862 * Sets element dir based on estimated directionality of the given text.
863 * @param {!Element} element
864 * @param {string} text
865 */
866goog.i18n.bidi.setElementDirByTextDirectionality = function(element, text) {
867 switch (goog.i18n.bidi.estimateDirection(text)) {
868 case (goog.i18n.bidi.Dir.LTR):
869 element.dir = 'ltr';
870 break;
871 case (goog.i18n.bidi.Dir.RTL):
872 element.dir = 'rtl';
873 break;
874 default:
875 // Default for no direction, inherit from document.
876 element.removeAttribute('dir');
877 }
878};
879
880
881
882/**
883 * Strings that have an (optional) known direction.
884 *
885 * Implementations of this interface are string-like objects that carry an
886 * attached direction, if known.
887 * @interface
888 */
889goog.i18n.bidi.DirectionalString = function() {};
890
891
892/**
893 * Interface marker of the DirectionalString interface.
894 *
895 * This property can be used to determine at runtime whether or not an object
896 * implements this interface. All implementations of this interface set this
897 * property to {@code true}.
898 * @type {boolean}
899 */
900goog.i18n.bidi.DirectionalString.prototype.
901 implementsGoogI18nBidiDirectionalString;
902
903
904/**
905 * Retrieves this object's known direction (if any).
906 * @return {?goog.i18n.bidi.Dir} The known direction. Null if unknown.
907 */
908goog.i18n.bidi.DirectionalString.prototype.getDirection;