1 | 1 | ;(function (global, factory) { |
2 | 1 | typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : |
3 | | typeof define === 'function' && define.amd ? define('Az', factory) : |
4 | | global.Az = factory() |
5 | 1 | }(this, function () { 'use strict'; |
6 | 1 | if (typeof require != 'undefined' && typeof exports === 'object' && typeof module !== 'undefined') { |
7 | 1 | var fs = require('fs'); |
8 | | } |
9 | | |
10 | 1 | var Az = { |
11 | | load: function(url, responseType, callback) { |
12 | 10 | if (fs) { |
13 | 10 | fs.readFile(url, { encoding: responseType == 'json' ? 'utf8' : null }, function (err, data) { |
14 | 10 | if (err) { |
15 | 0 | callback(err); |
16 | 0 | return; |
17 | | } |
18 | | |
19 | 10 | if (responseType == 'json') { |
20 | 4 | callback(null, JSON.parse(data)); |
21 | | } else |
22 | 6 | if (responseType == 'arraybuffer') { |
23 | 6 | callback(null, data.buffer); |
24 | | } else { |
25 | 0 | callback(new Error('Unknown responseType')); |
26 | | } |
27 | | }); |
28 | 10 | return; |
29 | | } |
30 | | |
31 | 0 | var xhr = new XMLHttpRequest(); |
32 | 0 | xhr.open('GET', url, true); |
33 | 0 | xhr.responseType = responseType; |
34 | | |
35 | 0 | xhr.onload = function (e) { |
36 | 0 | if (xhr.response) { |
37 | 0 | callback && callback(null, xhr.response); |
38 | | } |
39 | | }; |
40 | | |
41 | 0 | xhr.send(null); |
42 | | } |
43 | | }; |
44 | | |
45 | 1 | return Az; |
46 | | })); |
47 | 1 | ;(function (global, factory) { |
48 | 1 | typeof exports === 'object' && typeof module !== 'undefined' ? (module.exports = module.exports || {}) && (module.exports.DAWG = factory(module.exports)) : |
49 | | typeof define === 'function' && define.amd ? define('Az.DAWG', ['Az'], factory) : |
50 | | (global.Az = global.Az || {}) && (global.Az.DAWG = factory(global.Az)) |
51 | 1 | }(this, function (Az) { 'use strict'; |
52 | 1 | var ROOT = 0, |
53 | | MISSING = -1, |
54 | | PRECISION_MASK = 0xFFFFFFFF, |
55 | | HAS_LEAF_BIT = 1 << 8, |
56 | | EXTENSION_BIT = 1 << 9, |
57 | | OFFSET_MAX = 1 << 21, |
58 | | IS_LEAF_BIT = 1 << 31; |
59 | | |
60 | 1 | var CP1251 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, |
61 | | 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, |
62 | | 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, |
63 | | 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, |
64 | | 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, |
65 | | 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96, |
66 | | 97: 97, 98: 98, 99: 99, 100: 100, 101: 101, 102: 102, 103: 103, 104: 104, 105: 105, 106: 106, 107: 107, 108: 108, 109: 109, 110: 110, 111: 111, 112: 112, |
67 | | 113: 113, 114: 114, 115: 115, 116: 116, 117: 117, 118: 118, 119: 119, 120: 120, 121: 121, 122: 122, 123: 123, 124: 124, 125: 125, 126: 126, 127: 127, |
68 | | 1027: 129, 8225: 135, 1046: 198, 8222: 132, 1047: 199, 1168: 165, 1048: 200, 1113: 154, 1049: 201, 1045: 197, 1050: 202, 1028: 170, 160: 160, 1040: 192, |
69 | | 1051: 203, 164: 164, 166: 166, 167: 167, 169: 169, 171: 171, 172: 172, 173: 173, 174: 174, 1053: 205, 176: 176, 177: 177, 1114: 156, 181: 181, 182: 182, |
70 | | 183: 183, 8221: 148, 187: 187, 1029: 189, 1056: 208, 1057: 209, 1058: 210, 8364: 136, 1112: 188, 1115: 158, 1059: 211, 1060: 212, 1030: 178, 1061: 213, |
71 | | 1062: 214, 1063: 215, 1116: 157, 1064: 216, 1065: 217, 1031: 175, 1066: 218, 1067: 219, 1068: 220, 1069: 221, 1070: 222, 1032: 163, 8226: 149, 1071: 223, |
72 | | 1072: 224, 8482: 153, 1073: 225, 8240: 137, 1118: 162, 1074: 226, 1110: 179, 8230: 133, 1075: 227, 1033: 138, 1076: 228, 1077: 229, 8211: 150, 1078: 230, |
73 | | 1119: 159, 1079: 231, 1042: 194, 1080: 232, 1034: 140, 1025: 168, 1081: 233, 1082: 234, 8212: 151, 1083: 235, 1169: 180, 1084: 236, 1052: 204, 1085: 237, |
74 | | 1035: 142, 1086: 238, 1087: 239, 1088: 240, 1089: 241, 1090: 242, 1036: 141, 1041: 193, 1091: 243, 1092: 244, 8224: 134, 1093: 245, 8470: 185, 1094: 246, |
75 | | 1054: 206, 1095: 247, 1096: 248, 8249: 139, 1097: 249, 1098: 250, 1044: 196, 1099: 251, 1111: 191, 1055: 207, 1100: 252, 1038: 161, 8220: 147, 1101: 253, |
76 | | 8250: 155, 1102: 254, 8216: 145, 1103: 255, 1043: 195, 1105: 184, 1039: 143, 1026: 128, 1106: 144, 8218: 130, 1107: 131, 8217: 146, 1108: 186, 1109: 190}; |
77 | | |
78 | | // Based on all common ЙЦУКЕН-keyboards (both Windows and Apple variations) |
79 | 1 | var COMMON_TYPOS = { |
80 | | 'й': 'ёцыф', 'ц': 'йфыву', 'у': 'цывак', 'к': 'увапе', 'е': 'капрн', 'н': 'епрог', 'г': 'нролш', 'ш': 'голдщ', 'щ': 'шлджз', 'з': 'щджэх-', 'х': 'зжэъ-', 'ъ': 'хэ-ё', |
81 | | 'ф': 'йцычяё', 'ы': 'йцувсчяф', 'в': 'цукамсчы', 'а': 'укепимсв', 'п': 'кенртима', 'р': 'енгоьтип', 'о': 'нгшлбьтр', 'л': 'гшщдюбьо', 'д': 'шщзжюбл', 'ж': 'щзхэюд', 'э': 'зхъжё', |
82 | | 'ё': 'йфяъэ', 'я': 'ёфыч', 'ч': 'яфывс', 'с': 'чывам', 'м': 'свапи', 'и': 'мапрт', 'т': 'ипроь', 'ь': 'тролб', 'б': 'ьолдю', 'ю': 'блдж', |
83 | | '1': 'ёйц', '2': 'йцу', '3': 'цук', '4': 'уке', '5': 'кен', '6': 'енг', '7': 'нгш', '8': 'гшщ', '9': 'шщз', '0': 'щзх-', '-': 'зхъ', '=': '-хъ', '\\': 'ъэ', '.': 'южэ' |
84 | | }; |
85 | | |
86 | 1 | function offset(base) { |
87 | 1 | return ((base >> 10) << ((base & EXTENSION_BIT) >> 6)) & PRECISION_MASK; |
88 | | } |
89 | | |
90 | 1 | function label(base) { |
91 | 1 | return base & (IS_LEAF_BIT | 0xFF) & PRECISION_MASK; |
92 | | } |
93 | | |
94 | 1 | function hasLeaf(base) { |
95 | 0 | return (base & HAS_LEAF_BIT & PRECISION_MASK) != 0; |
96 | | } |
97 | | |
98 | 1 | function value(base) { |
99 | 0 | return base & ~IS_LEAF_BIT & PRECISION_MASK; |
100 | | } |
101 | | |
102 | 1 | var DAWG = function(units, guide, format) { |
103 | 5 | this.units = units; |
104 | 5 | this.guide = guide; |
105 | 5 | this.format = format; |
106 | | } |
107 | | |
108 | 1 | DAWG.fromArrayBuffer = function(data, format) { |
109 | 5 | var dv = new DataView(data), |
110 | | unitsLength = dv.getUint32(0, true), |
111 | | guideLength = dv.getUint32(unitsLength * 4 + 4, true); |
112 | 5 | return new DAWG( |
113 | | new Uint32Array(data, 4, unitsLength), |
114 | | new Uint8Array(data, unitsLength * 4 + 8, guideLength * 2), |
115 | | format); |
116 | | } |
117 | | |
118 | 1 | DAWG.load = function(url, format, callback) { |
119 | 5 | Az.load(url, 'arraybuffer', function(err, data) { |
120 | 5 | callback(err, err ? null : DAWG.fromArrayBuffer(data, format)); |
121 | | }); |
122 | | } |
123 | | |
124 | 1 | DAWG.prototype.followByte = function(c, index) { |
125 | 1 | var o = offset(this.units[index]); |
126 | 1 | var nextIndex = (index ^ o ^ (c & 0xFF)) & PRECISION_MASK; |
127 | | |
128 | 1 | if (label(this.units[nextIndex]) != (c & 0xFF)) { |
129 | 1 | return MISSING; |
130 | | } |
131 | | |
132 | 0 | return nextIndex; |
133 | | } |
134 | | |
135 | 1 | DAWG.prototype.followString = function(str, index) { |
136 | 0 | index = index || ROOT; |
137 | 0 | for (var i = 0; i < str.length; i++) { |
138 | 0 | var code = str.charCodeAt(i); |
139 | 0 | if (!(code in CP1251)) { |
140 | 0 | return MISSING; |
141 | | } |
142 | 0 | index = this.followByte(CP1251[code], index); |
143 | 0 | if (index == MISSING) { |
144 | 0 | return MISSING; |
145 | | } |
146 | | } |
147 | 0 | return index; |
148 | | } |
149 | | |
150 | 1 | DAWG.prototype.hasValue = function(index) { |
151 | 0 | return hasLeaf(this.units[index]); |
152 | | } |
153 | | |
154 | 1 | DAWG.prototype.value = function(index) { |
155 | 0 | var o = offset(this.units[index]); |
156 | 0 | var valueIndex = (index ^ o) & PRECISION_MASK; |
157 | 0 | return value(this.units[valueIndex]); |
158 | | } |
159 | | |
160 | 1 | DAWG.prototype.find = function(str) { |
161 | 0 | var index = this.followString(str); |
162 | 0 | if (index == MISSING) { |
163 | 0 | return MISSING; |
164 | | } |
165 | 0 | if (!this.hasValue(index)) { |
166 | 0 | return MISSING; |
167 | | } |
168 | 0 | return this.value(index); |
169 | | } |
170 | | |
171 | 1 | DAWG.prototype.iterateAll = function(index) { |
172 | 0 | var results = []; |
173 | 0 | var stack = [index]; |
174 | 0 | var key = []; |
175 | 0 | var last = ROOT; |
176 | 0 | var label; |
177 | | |
178 | 0 | while (true) { |
179 | 0 | index = stack[stack.length - 1]; |
180 | | |
181 | 0 | if (last != ROOT) { |
182 | 0 | label = this.guide[index << 1]; |
183 | 0 | if (label) { |
184 | 0 | index = this.followByte(label, index); |
185 | 0 | if (index == MISSING) { |
186 | 0 | return results; |
187 | | } |
188 | 0 | key.push(label); |
189 | 0 | stack.push(index); |
190 | | } else { |
191 | 0 | do { |
192 | 0 | label = this.guide[(index << 1) + 1]; |
193 | 0 | key.pop(); |
194 | 0 | stack.pop(); |
195 | 0 | if (!stack.length) { |
196 | 0 | return results; |
197 | | } |
198 | 0 | index = stack[stack.length - 1]; |
199 | 0 | if (label) { |
200 | 0 | index = this.followByte(label, index); |
201 | 0 | if (index == MISSING) { |
202 | 0 | return results; |
203 | | } |
204 | 0 | key.push(label); |
205 | 0 | stack.push(index); |
206 | | } |
207 | | } while (!label); |
208 | | } |
209 | | } |
210 | | |
211 | 0 | while (!this.hasValue(index)) { |
212 | 0 | var label = this.guide[index << 1]; |
213 | 0 | index = this.followByte(label, index); |
214 | 0 | if (index == MISSING) { |
215 | 0 | return results; |
216 | | } |
217 | 0 | key.push(label); |
218 | 0 | stack.push(index); |
219 | | } |
220 | | |
221 | | // Only three formats supported |
222 | 0 | if (this.format == 'words') { |
223 | 0 | results.push([ |
224 | | ((key[0] ^ 1) << 6) + (key[1] >> 1), |
225 | | ((key[2] ^ 1) << 6) + (key[3] >> 1) |
226 | | ]); |
227 | | } else |
228 | 0 | if (this.format == 'probs') { |
229 | 0 | results.push([ |
230 | | ((key[0] ^ 1) << 6) + (key[1] >> 1), |
231 | | ((key[2] ^ 1) << 6) + (key[3] >> 1), |
232 | | ((key[4] ^ 1) << 6) + (key[5] >> 1) |
233 | | ]); |
234 | | } else { |
235 | | // Raw bytes |
236 | 0 | results.push(key.slice()); |
237 | | } |
238 | 0 | last = index; |
239 | | } |
240 | | } |
241 | | |
242 | | // Features: |
243 | | // replaces (е -> ё) (DONE) |
244 | | // stutter (ннет -> нет, гоол -> гол, д-да -> да) |
245 | | // typos (count-limited): |
246 | | // swaps (солво -> слово) |
247 | | // extra letters (свлово -> слово) |
248 | | // missing letters (сово -> слово) |
249 | | // wrong letters (сково -> слово) |
250 | 1 | DAWG.prototype.findAll = function(str, replaces, mstutter, mtypos) { |
251 | 1 | mtypos = mtypos || 0; |
252 | 1 | mstutter = mstutter || 0; |
253 | 1 | var results = [], |
254 | | prefixes = [['', 0, 0, 0, ROOT]], |
255 | | prefix, index, len, code, cur, typos, stutter; |
256 | | |
257 | 1 | while (prefixes.length) { |
258 | 1 | prefix = prefixes.pop(); |
259 | 1 | index = prefix[4], stutter = prefix[3], typos = prefix[2], len = prefix[1], prefix = prefix[0]; |
260 | | |
261 | | // Done |
262 | 1 | if (len == str.length) { |
263 | 0 | if (this.format == 'int') { |
264 | 0 | if (this.hasValue(index)) { |
265 | 0 | results.push([prefix, this.value(index)]); |
266 | | } |
267 | 0 | continue; |
268 | | } |
269 | | // Find all payloads |
270 | 0 | if (this.format == 'words' || this.format == 'probs') { |
271 | 0 | index = this.followByte(1, index); // separator |
272 | 0 | if (index == MISSING) { |
273 | 0 | continue; |
274 | | } |
275 | | } |
276 | 0 | results.push([prefix, this.iterateAll(index), stutter, typos]); |
277 | 0 | continue; |
278 | | } |
279 | | |
280 | | // Follow a replacement path |
281 | 1 | if (replaces && str[len] in replaces) { |
282 | 0 | code = replaces[str[len]].charCodeAt(0); |
283 | 0 | if (code in CP1251) { |
284 | 0 | cur = this.followByte(CP1251[code], index); |
285 | 0 | if (cur != MISSING) { |
286 | 0 | prefixes.push([ prefix + replaces[str[len]], len + 1, typos, stutter, cur ]); |
287 | | } |
288 | | } |
289 | | } |
290 | | |
291 | | // Follow typos path (if not over limit) |
292 | 1 | if (typos < mtypos && !stutter) { |
293 | | // Skip a letter entirely (extra letter) |
294 | 0 | prefixes.push([ prefix, len + 1, typos + 1, stutter, index ]); |
295 | | |
296 | | // Add a letter (missing) - or - replace a letter |
297 | | // TODO: iterate all childs? |
298 | | // Now it checks only most probable typos (located near to each other on keyboards) |
299 | 0 | var possible = COMMON_TYPOS[str[len]]; |
300 | 0 | if (possible) { |
301 | 0 | for (var i = 0; i < possible.length; i++) { |
302 | 0 | code = possible.charCodeAt(i); |
303 | 0 | if (code in CP1251) { |
304 | 0 | cur = this.followByte(CP1251[code], index); |
305 | 0 | if (cur != MISSING) { |
306 | | // for missing letter we need to iterate all childs, not only COMMON_TYPOS |
307 | | // prefixes.push([ prefix + possible[i], len, typos + 1, stutter, cur ]); |
308 | 0 | prefixes.push([ prefix + possible[i], len + 1, typos + 1, stutter, cur ]); |
309 | | } |
310 | | } |
311 | | } |
312 | | } |
313 | | |
314 | | // Swapped two letters |
315 | | // TODO: support for replacements? |
316 | 0 | if (len < str.length - 1) { |
317 | 0 | code = str.charCodeAt(len + 1); |
318 | 0 | if (code in CP1251) { |
319 | 0 | cur = this.followByte(CP1251[code], index); |
320 | 0 | if (cur != MISSING) { |
321 | 0 | code = str.charCodeAt(len); |
322 | 0 | if (code in CP1251) { |
323 | 0 | cur = this.followByte(CP1251[code], cur); |
324 | 0 | if (cur != MISSING) { |
325 | 0 | prefixes.push([ prefix + str[len + 1] + str[len], len + 2, typos + 1, stutter, cur ]); |
326 | | } |
327 | | } |
328 | | } |
329 | | } |
330 | | } |
331 | | } |
332 | | |
333 | | // Follow base path |
334 | 1 | code = str.charCodeAt(len); |
335 | 1 | if (code in CP1251) { |
336 | 1 | cur = this.followByte(CP1251[code], index); |
337 | 1 | if (cur != MISSING) { |
338 | 0 | prefixes.push([ prefix + str[len], len + 1, typos, stutter, cur ]); |
339 | | |
340 | 0 | while (stutter < mstutter && !typos && len < str.length - 1) { |
341 | | // Follow a simple stutter path (merge two equal letters into one) |
342 | 0 | if (str[len] == str[len + 1]) { |
343 | 0 | prefixes.push([ prefix + str[len], len + 2, typos, stutter + 1, cur ]); |
344 | 0 | len++; |
345 | | } else |
346 | | // Follow a stutter with a dash (д-да) |
347 | 0 | if (len < str.length - 2 && str[len + 1] == '-' && str[len] == str[len + 2]) { |
348 | 0 | prefixes.push([ prefix + str[len], len + 3, typos, stutter + 1, cur ]); |
349 | 0 | len += 2; |
350 | | } else { |
351 | 0 | break; |
352 | | } |
353 | 0 | stutter++; |
354 | | } |
355 | | } |
356 | | } |
357 | | } |
358 | 1 | return results; |
359 | | } |
360 | | |
361 | 1 | return DAWG; |
362 | | })); |
363 | 1 | ;(function (global, factory) { |
364 | 1 | typeof exports === 'object' && typeof module !== 'undefined' ? (module.exports = module.exports || {}) && (module.exports.Morph = factory(module.exports)) : |
365 | | typeof define === 'function' && define.amd ? define('Az.Morph', ['Az', 'Az.DAWG'], factory) : |
366 | | (global.Az = global.Az || {}) && (global.Az.Morph = factory(global.Az)) |
367 | 1 | }(this, function (Az) { 'use strict'; |
368 | 1 | var words, |
369 | | probabilities, |
370 | | predictionSuffixes = new Array(3), |
371 | | prefixes = [ '', 'по', 'наи' ], |
372 | | suffixes, |
373 | | grammemes, |
374 | | paradigms, |
375 | | tags, |
376 | | defaults = { |
377 | | // Замены (работают как в pymorphy2). |
378 | | // false, чтобы отключить. |
379 | | replacements: { 'е': 'ё' }, |
380 | | // "Заикание". Устраняет повторения букв (как с дефисом - "не-е-ет", так и без - "нееет"). |
381 | | // Infinity не ограничивает максимальное число повторений (суммарно во всем слове). |
382 | | // 0 или false чтобы отключить. |
383 | | stutter: Infinity, |
384 | | // Опечатки. Максимальное количество опечаток в слове. |
385 | | // Опечаткой считается: |
386 | | // - лишняя буква в слове |
387 | | // - (пропущенная буква в слове) (TODO: пока не работает) |
388 | | // - не та буква в слове (если правильная буква стоит рядом на клавиатуре) |
389 | | // - переставленные местами соседние буквы |
390 | | // 0 или false чтобы отключить. |
391 | | // 'auto': |
392 | | // - 0, если слово короче 5 букв |
393 | | // - 1, если слово короче 10 букв (но только если не нашлось варианта разбора без опечаток) |
394 | | // - 2 в противном случае (но только если не нашлось варианта разбора без опечаток или с 1 опечаткой) |
395 | | typos: 0 |
396 | | // Совместное появление опечаток и "заикания" считается недопустимым (т.к. это приводит к большому числу вариантов, особенно на словах с "заиканием") |
397 | | }; |
398 | | |
399 | | // Взято из https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/freeze |
400 | 1 | function deepFreeze(obj) { |
401 | 29281 | if (!('freeze' in Object)) { |
402 | 0 | return; |
403 | | } |
404 | | |
405 | 29281 | var propNames = Object.getOwnPropertyNames(obj); |
406 | 29281 | propNames.forEach(function(name) { |
407 | 252745 | var prop = obj[name]; |
408 | | |
409 | 252745 | if (typeof prop == 'object' && prop !== null) |
410 | 29280 | deepFreeze(prop); |
411 | | }); |
412 | | |
413 | 29281 | return Object.freeze(obj); |
414 | | } |
415 | | |
416 | | // |
417 | | // Экземпляры Tag могут быть довольно большими, т.к. будут переиспользоваться для всех слов. |
418 | | // Однако это приводит к запрету на любые изменения этих экземпляров. В современных браузерах для этого будет использован метод Object.freeze() |
419 | | // Каждая граммема хранится внутри тега в нескольких местах: |
420 | | // tag[grammeme] = true | false |
421 | | // tag[parent] = grammeme |
422 | | // tag.stat = [grammeme1, grammeme2, ...] // неизменяемые граммемы |
423 | | // tag.flex = [grammeme1, grammeme2, ...] // изменяемые граммемы |
424 | | // |
425 | | // |
426 | | // tag.ext[grammemeCyr] = true | false |
427 | | // tag.ext[parentCyr] = grammemeCyr |
428 | | // tag.ext.stat = [grammemeCyr1, grammemeCyr2, ...] // неизменяемые граммемы |
429 | | // tag.ext.flex = [grammemeCyr1, grammemeCyr2, ...] // изменяемые граммемы |
430 | | // |
431 | | // |
432 | | // Тут grammeme - латинская запись граммемы, grammemeCyr - кириллическая |
433 | | // parent, parentCyr - родительская граммема. |
434 | | // |
435 | 1 | var Tag = function(str) { |
436 | 9760 | var par, pair = str.split(' '); |
437 | 9760 | this.stat = pair[0].split(','); |
438 | 9760 | for (var i = 0; i < this.stat.length; i++) { |
439 | 42374 | this[this.stat[i]] = true; |
440 | 42374 | if (par = grammemes[this.stat[i]].parent) { |
441 | 31332 | this[par] = this.stat[i]; |
442 | | } |
443 | | } |
444 | 9760 | this.flex = pair[1] ? pair[1].split(',') : []; |
445 | 9760 | for (var i = 0; i < this.flex.length; i++) { |
446 | 26842 | this[this.flex[i]] = true; |
447 | 26842 | if (par = grammemes[this.flex[i]].parent) { |
448 | 24428 | this[par] = this.flex[i]; |
449 | | } |
450 | | } |
451 | 9760 | if ('POST' in this) { |
452 | 9760 | this.POS = this.POST; |
453 | | } |
454 | | } |
455 | 1 | Tag.prototype.toString = function() { |
456 | 0 | return (this.stat.join(',') + ' ' + this.flex.join(',')).trim(); |
457 | | } |
458 | | // Проверяет согласованность с конкретными значениями граммем либо со списком граммем из другого тега (или слова) |
459 | | // tag.matches({ 'POS' : 'NOUN', 'GNdr': ['masc', 'neut'] }) |
460 | | // Ключи — названия граммем, значения — дочерние граммемы, массивы граммем, либо true/false |
461 | | // tag.matches(otherTag, ['POS', 'GNdr']) |
462 | | // Тег (или слово) + список граммем, значения которых у этих двух тегов должны совпадать |
463 | 1 | Tag.prototype.matches = function(tag, grammemes) { |
464 | 0 | if (!grammemes) { |
465 | | // Match to map |
466 | 0 | for (var k in tag) { |
467 | 0 | if (Object.prototype.toString.call(tag[k]) === '[object Array]') { |
468 | 0 | if (!tag[k].indexOf(this[k])) { |
469 | 0 | return false; |
470 | | } |
471 | | } else { |
472 | 0 | if (tag[k] != this[k]) { |
473 | 0 | return false; |
474 | | } |
475 | | } |
476 | | } |
477 | 0 | return true; |
478 | | } |
479 | | |
480 | 0 | if (tag instanceof Word) { |
481 | 0 | tag = tag.tag; |
482 | | } |
483 | | |
484 | | // Match to another tag |
485 | 0 | for (var i = 0; i < grammemes.length; i++) { |
486 | 0 | if (tag[grammemes[i]] != this[grammemes[i]]) { |
487 | 0 | return false; |
488 | | } |
489 | | } |
490 | 0 | return true; |
491 | | } |
492 | | |
493 | 1 | var Word = function(val, paradigmIdx, formIdx, stutterCnt, typosCnt) { |
494 | 0 | this.val = val; |
495 | 0 | this.paradigmIdx = paradigmIdx; |
496 | 0 | this.paradigm = paradigms[paradigmIdx]; |
497 | 0 | var len = this.paradigm.length / 3; |
498 | 0 | this.formIdx = formIdx; |
499 | 0 | this.tag = tags[this.paradigm[len + formIdx]]; |
500 | 0 | this.stutterCnt = stutterCnt; |
501 | 0 | this.typosCnt = typosCnt; |
502 | | } |
503 | | // Возвращает основу слова |
504 | 1 | Word.prototype.base = function() { |
505 | 0 | if (this._base) { |
506 | 0 | return this._base; |
507 | | } |
508 | 0 | var len = this.paradigm.length / 3; |
509 | 0 | return this._base = this.val.substring(prefixes[this.paradigm[(len << 1) + this.formIdx]].length, this.val.length - suffixes[this.paradigm[this.formIdx]].length); |
510 | | } |
511 | | // Приводит к начальной форме. Аргумент keepPOS=true нужен, если требуется не менять часть речи при нормализации (например, не делать из причастия инфинитив). |
512 | | // TODO: некоторые смены частей речи, возможно, стоит делать в любом случае (т.к., например, компаративы, краткие формы причастий и прилагательных разделены, инфинитив отделен от глагола) |
513 | 1 | Word.prototype.normalize = function(keepPOS) { |
514 | 0 | return this.inflect(keepPOS ? { POS: this.tag.POS } : 0); |
515 | | } |
516 | | // Склоняет/спрягает слово так, чтобы оно соответствовало граммемам другого слова, тега или просто конкретным граммемам (подробнее см. Tag.prototype.matches). |
517 | | // Всегда выбирается первый подходящий вариант. |
518 | 1 | Word.prototype.inflect = function(tag, grammemes) { |
519 | 0 | var len = this.paradigm.length / 3; |
520 | 0 | if (!grammemes && typeof tag === 'number') { |
521 | | // Inflect to specific formIdx |
522 | 0 | return [prefixes[this.paradigm[(len << 1) + tag]] + this.base() + suffixes[this.paradigm[tag]], tags[this.paradigm[len + tag]]]; |
523 | | } |
524 | | |
525 | 0 | for (var formIdx = 0; formIdx < len; formIdx++) { |
526 | 0 | if (tags[this.paradigm[len + formIdx]].matches(tag, grammemes)) { |
527 | 0 | return [prefixes[this.paradigm[(len << 1) + formIdx]] + this.base() + suffixes[this.paradigm[formIdx]], tags[this.paradigm[len + formIdx]]]; |
528 | | } |
529 | | } |
530 | | |
531 | 0 | return false; |
532 | | } |
533 | | // Аналогично Tag.prototype.matches. |
534 | 1 | Word.prototype.matches = function(tag, grammemes) { |
535 | 0 | return this.tag.matches(tag, grammemes); |
536 | | } |
537 | | // Выводит информацию о слове в консоль. |
538 | 1 | Word.prototype.log = function() { |
539 | 0 | var len = this.paradigm.length / 3; |
540 | 0 | console.group(this.val); |
541 | 0 | console.log('Stutter?', this.stutterCnt, 'Typos?', this.typosCnt); |
542 | 0 | console.log(prefixes[this.paradigm[(len << 1) + this.formIdx]] + '|' + this.base() + '|' + suffixes[this.paradigm[this.formIdx]]); |
543 | 0 | console.log(this.tag.ext.toString()); |
544 | 0 | var norm = this.normalize(); |
545 | 0 | console.log('=> ', norm[0] + ' (' + norm[1].ext.toString() + ')'); |
546 | 0 | var norm = this.normalize(true); |
547 | 0 | console.log('=> ', norm[0] + ' (' + norm[1].ext.toString() + ')'); |
548 | 0 | console.groupCollapsed('Все формы: ' + len); |
549 | 0 | for (var formIdx = 0; formIdx < len; formIdx++) { |
550 | 0 | var form = this.inflect(formIdx); |
551 | 0 | console.log(form[0] + ' (' + form[1].ext.toString() + ')'); |
552 | | } |
553 | 0 | console.groupEnd(); |
554 | 0 | console.groupEnd(); |
555 | | } |
556 | | |
557 | 1 | var Morph = function(word, config) { |
558 | 1 | config = config || defaults; |
559 | | |
560 | 1 | for (var k in defaults) { |
561 | 3 | if (!(k in config)) { |
562 | 0 | config[k] = defaults[k]; |
563 | | } |
564 | | } |
565 | | |
566 | 1 | var opts; |
567 | 1 | if (config.typos == 'auto') { |
568 | 0 | opts = words.findAll(word, config.replacements, config.stutter, 0); |
569 | 0 | if (!opts.length && word.length > 4) { |
570 | 0 | opts = words.findAll(word, config.replacements, config.stutter, 1); |
571 | 0 | if (!opts.length && word.length > 9) { |
572 | 0 | opts = words.findAll(word, config.replacements, config.stutter, 2); |
573 | | } |
574 | | } |
575 | | } else { |
576 | 1 | opts = words.findAll(word, config.replacements, config.stutter, config.typos); |
577 | | } |
578 | | |
579 | 1 | var vars = []; |
580 | | //console.log(opts); |
581 | 1 | for (var i = 0; i < opts.length; i++) { |
582 | 0 | for (var j = 0; j < opts[i][1].length; j++) { |
583 | 0 | var word = new Word(opts[i][0], opts[i][1][j][0], opts[i][1][j][1], opts[i][2], opts[i][3]); |
584 | | //word.log(); |
585 | 0 | vars.push(word); |
586 | | } |
587 | | } |
588 | 1 | return vars; |
589 | | } |
590 | | |
591 | 1 | Morph.setDefaults = function(config) { |
592 | 0 | defaults = config; |
593 | | } |
594 | | |
595 | 1 | Morph.init = function(path, callback) { |
596 | 1 | var loading = 0; |
597 | 1 | var tagsInt, tagsExt; |
598 | 1 | function loaded() { |
599 | 10 | if (!--loading) { |
600 | 1 | tags = Array(tagsInt.length); |
601 | 1 | for (var i = 0; i < tagsInt.length; i++) { |
602 | 4880 | tags[i] = new Tag(tagsInt[i]); |
603 | 4880 | tags[i].ext = new Tag(tagsExt[i]); |
604 | | } |
605 | 1 | tags = deepFreeze(tags); |
606 | 1 | callback && callback(null, Morph); |
607 | | } |
608 | | } |
609 | | |
610 | 1 | loading++; |
611 | 1 | Az.DAWG.load(path + '/words.dawg', 'words', function(err, dawg) { |
612 | 1 | words = dawg; |
613 | 1 | loaded(); |
614 | | }); |
615 | | |
616 | 1 | for (var prefix = 0; prefix < 3; prefix++) { |
617 | 3 | (function(prefix) { |
618 | 3 | loading++; |
619 | 3 | Az.DAWG.load(path + '/prediction-suffixes-' + prefix + '.dawg', 'probs', function(err, dawg) { |
620 | 3 | predictionSuffixes[prefix] = dawg; |
621 | 3 | loaded(); |
622 | | }); |
623 | | })(prefix); |
624 | | } |
625 | | |
626 | 1 | loading++; |
627 | 1 | Az.DAWG.load(path + '/p_t_given_w.intdawg', 'int', function(err, dawg) { |
628 | 1 | probabilities = dawg; |
629 | 1 | loaded(); |
630 | | }); |
631 | | |
632 | 1 | loading++; |
633 | 1 | Az.load(path + '/grammemes.json', 'json', function(err, json) { |
634 | 1 | grammemes = {}; |
635 | 1 | for (var i = 0; i < json.length; i++) { |
636 | 113 | grammemes[json[i][0]] = grammemes[json[i][2]] = { |
637 | | parent: json[i][1], |
638 | | internal: json[i][0], |
639 | | external: json[i][2], |
640 | | externalFull: json[i][3] |
641 | | } |
642 | | } |
643 | 1 | loaded(); |
644 | | }); |
645 | | |
646 | 1 | loading++; |
647 | 1 | Az.load(path + '/gramtab-opencorpora-int.json', 'json', function(err, json) { |
648 | 1 | tagsInt = json; |
649 | 1 | loaded(); |
650 | | }); |
651 | | |
652 | 1 | loading++; |
653 | 1 | Az.load(path + '/gramtab-opencorpora-ext.json', 'json', function(err, json) { |
654 | 1 | tagsExt = json; |
655 | 1 | loaded(); |
656 | | }); |
657 | | |
658 | 1 | loading++; |
659 | 1 | Az.load(path + '/suffixes.json', 'json', function(err, json) { |
660 | 1 | suffixes = json; |
661 | 1 | loaded(); |
662 | | }); |
663 | | |
664 | 1 | loading++; |
665 | 1 | Az.load(path + '/paradigms.array', 'arraybuffer', function(err, data) { |
666 | 1 | var list = new Uint16Array(data), |
667 | | count = list[0], |
668 | | pos = 1; |
669 | | |
670 | 1 | paradigms = []; |
671 | 1 | for (var i = 0; i < count; i++) { |
672 | 3256 | var size = list[pos++]; |
673 | 3256 | paradigms.push(list.subarray(pos, pos + size)); |
674 | 3256 | pos += size; |
675 | | } |
676 | 1 | loaded(); |
677 | | }); |
678 | | } |
679 | | |
680 | 1 | return Morph; |
681 | | })); |
682 | 1 | ;(function (global, factory) { |
683 | 1 | typeof exports === 'object' && typeof module !== 'undefined' ? (module.exports = module.exports || {}) && (module.exports.Syntax = factory(module.exports)) : |
684 | | typeof define === 'function' && define.amd ? define('Az.Syntax', ['Az'], factory) : |
685 | | (global.Az = global.Az || {}) && (global.Az.Syntax = factory(global.Az)) |
686 | 1 | }(this, function (Az) { 'use strict'; |
687 | | // TBD: Syntax analyzer |
688 | 1 | var Syntax = function() { |
689 | | |
690 | | } |
691 | | |
692 | 1 | return Syntax; |
693 | | })); |
694 | 1 | ;(function (global, factory) { |
695 | 1 | typeof exports === 'object' && typeof module !== 'undefined' ? (module.exports = module.exports || {}) && (module.exports.Tokens = factory()) : |
696 | | typeof define === 'function' && define.amd ? define('Az.Tokens', ['Az'], factory) : |
697 | | (global.Az = global.Az || {}) && (global.Az.Tokens = factory()) |
698 | 1 | }(this, function () { 'use strict'; |
699 | 1 | var TLDs = 'ac|ad|ae|aero|af|ag|ai|al|am|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|biz|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|com|coop|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jo|jobs|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|na|name|nc|ne|net|nf|ng|nl|no|nr|nu|nz|om|org|pa|pe|pf|ph|pk|pl|pm|pn|post|pr|pro|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tr|travel|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt|امارات|հայ|বাংলা|бел|中国|中國|الجزائر|مصر|ею|გე|ελ|香港|भारत|بھارت|భారత్|ભારત|ਭਾਰਤ|ভারত|இந்தியா|ایران|ايران|عراق|الاردن|한국|қаз|ලංකා|இலங்கை|المغرب|мкд|мон|澳門|澳门|مليسيا|عمان|پاکستان|پاكستان|فلسطين|срб|рф|قطر|السعودية|السعودیة|السعودیۃ|السعوديه|سودان|新加坡|சிங்கப்பூர்|سورية|سوريا|ไทย|تونس|台灣|台湾|臺灣|укр|اليمن|xxx|zm|aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|academy|accenture|accountant|accountants|aco|active|actor|adac|ads|adult|aeg|aetna|afamilycompany|afl|africa|africamagic|agakhan|agency|aig|aigo|airbus|airforce|airtel|akdn|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|aol|apartments|app|apple|aquarelle|arab|aramco|archi|army|art|arte|asda|associates|athleta|attorney|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aws|axa|azure|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bbc|bbt|bbva|bcg|bcn|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bharti|bible|bid|bike|bing|bingo|bio|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|boehringer|bofa|bom|bond|boo|book|booking|boots|bosch|bostik|boston|bot|boutique|box|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|bugatti|build|builders|business|buy|buzz|bzh|cab|cafe|cal|call|calvinklein|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|catering|catholic|cba|cbn|cbre|cbs|ceb|center|ceo|cern|cfa|cfd|chanel|channel|chase|chat|cheap|chintai|chloe|christmas|chrome|chrysler|church|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|coach|codes|coffee|college|cologne|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|corsica|country|coupon|coupons|courses|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cuisinella|cymru|cyou|dabur|dad|dance|date|dating|datsun|day|dclk|dds|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dnp|docs|dodge|dog|doha|domains|dot|download|drive|dstv|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dwg|earth|eat|edeka|education|email|emerck|emerson|energy|engineer|engineering|enterprises|epost|epson|equipment|ericsson|erni|esq|estate|esurance|etisalat|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|flickr|flights|flir|florist|flowers|flsmidth|fly|foo|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|gal|gallery|gallo|gallup|game|games|gap|garden|gbiz|gdn|gea|gent|genting|george|ggee|gift|gifts|gives|giving|glade|glass|gle|global|globo|gmail|gmbh|gmo|gmx|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gotv|grainger|graphics|gratis|green|gripe|group|guardian|gucci|guge|guide|guitars|guru|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hkt|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|host|hosting|hot|hoteles|hotmail|house|how|hsbc|htc|hughes|hyatt|hyundai|ibm|icbc|ice|icu|ieee|ifm|iinet|ikano|imamat|imdb|immo|immobilien|industries|infiniti|ing|ink|institute|insurance|insure|intel|international|intuit|investments|ipiranga|irish|iselect|ismaili|ist|istanbul|itau|itv|iveco|iwc|jaguar|java|jcb|jcp|jeep|jetzt|jewelry|jio|jlc|jll|jmp|jnj|joburg|jot|joy|jpmorgan|jprs|juegos|juniper|kaufen|kddi|kerryhotels|kerrylogistics|kerryproperties|kfh|kia|kim|kinder|kindle|kitchen|kiwi|koeln|komatsu|kosher|kpmg|kpn|krd|kred|kuokgroup|kyknet|kyoto|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|ltd|ltda|lundbeck|lupin|luxe|luxury|macys|madrid|maif|maison|makeup|man|management|mango|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mcd|mcdonalds|mckinsey|med|media|meet|melbourne|meme|memorial|men|menu|meo|metlife|miami|microsoft|mini|mint|mit|mitsubishi|mlb|mls|mma|mnet|mobily|moda|moe|moi|mom|monash|money|monster|montblanc|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|msd|mtn|mtpc|mtr|multichoice|mutual|mutuelle|mzansimagic|nab|nadex|nagoya|naspers|nationwide|natura|navy|nba|nec|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nfl|ngo|nhk|nico|nike|nikon|ninja|nissan|nissay|nokia|northwesternmutual|norton|now|nowruz|nowtv|nra|nrw|ntt|nyc|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|organic|orientexpress|origins|osaka|otsuka|ott|ovh|page|pamperedchef|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|payu|pccw|pet|pfizer|pharmacy|philips|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|place|play|playstation|plumbing|plus|pnc|pohl|poker|politie|porn|pramerica|praxi|press|prime|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|pub|pwc|qpon|quebec|quest|qvc|racing|raid|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|rocher|rocks|rodeo|rogers|room|rsvp|ruhr|run|rwe|ryukyu|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|sas|save|saxo|sbi|sbs|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shopping|shouji|show|showtime|shriram|silk|sina|singles|site|ski|skin|sky|skype|sling|smart|smile|sncf|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|spot|spreadbetting|srl|srt|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|sucks|supersport|supplies|supply|support|surf|surgery|suzuki|swatch|swiftcover|swiss|sydney|symantec|systems|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tci|tdk|team|tech|technology|telecity|telefonica|temasek|tennis|teva|thd|theater|theatre|theguardian|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tjmaxx|tjx|tkmaxx|tmall|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|trade|trading|training|travelchannel|travelers|travelersinsurance|trust|trv|tube|tui|tunes|tushu|tvs|ubank|ubs|uconnect|unicom|university|uno|uol|ups|vacations|vana|vanguard|vegas|ventures|verisign|versicherung|vet|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vista|vistaprint|viva|vivo|vlaanderen|vodka|volkswagen|volvo|vote|voting|voto|voyage|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|कॉम|セール|佛山|慈善|集团|在线|大众汽车|点看|คอม|八卦|موقع|一号店|公益|公司|香格里拉|网站|移动|我爱你|москва|католик|онлайн|сайт|联通|קום|时尚|微博|淡马锡|ファッション|орг|नेट|ストア|삼성|商标|商店|商城|дети|ポイント|新闻|工行|家電|كوم|中文网|中信|娱乐|谷歌|電訊盈科|购物|クラウド|通販|网店|संगठन|餐厅|网络|ком|诺基亚|食品|飞利浦|手表|手机|ارامكو|العليان|اتصالات|بازار|موبايلي|ابوظبي|كاثوليك|همراه|닷컴|政府|شبكة|بيتك|عرب|机构|组织机构|健康|рус|珠宝|大拿|みんな|グーグル|世界|書籍|网址|닷넷|コム|天主教|游戏|vermögensberater|vermögensberatung|企业|信息|嘉里大酒店|嘉里|广东|政务|xperia|xyz|yachts|yahoo|yamaxun|yandex|yodobashi|yoga|yokohama|you|youtube|yun|zappos|zara|zero|zip|zippo|zone|zuerich'.split('|'); |
700 | 1 | var defaults = { |
701 | | html: false, |
702 | | wiki: false, // TODO: check all cases |
703 | | markdown: false, // TODO: check all cases |
704 | | hashtags: true, |
705 | | mentions: true, |
706 | | emails: true, |
707 | | links: { |
708 | | protocols: true, |
709 | | www: true, |
710 | | tlds: {} |
711 | | } |
712 | | }; |
713 | | /* TODO: add more named HTML entities */ |
714 | 1 | var HTML_ENTITIES = { nbsp: ' ', quot: '"', gt: '>', lt: '<', amp: '&' }; |
715 | | |
716 | 1 | for (var i = 0; i < TLDs.length; i++) { |
717 | 1528 | defaults.links.tlds[TLDs[i]] = true; |
718 | | } |
719 | | |
720 | | // Start splitting text into tokens |
721 | | // Returns a context, use `done` method to retrieve result |
722 | 1 | var Tokens = function(text, config) { |
723 | 8 | if (this instanceof Tokens) { |
724 | 4 | this.tokens = []; |
725 | 4 | this.config = config || defaults; |
726 | 4 | this.append(text); |
727 | 4 | this.index = -1; |
728 | | } else { |
729 | 4 | return new Tokens(text); |
730 | | } |
731 | | } |
732 | | |
733 | | // Adds more text content |
734 | 1 | Tokens.prototype.append = function(text, config) { |
735 | | // TODO: get rid of 's' field (storing a copy of token) |
736 | | // st + len + en should be enough (check that they are always correct) |
737 | 4 | config = config || this.config; |
738 | 4 | for (var i = 0; i < text.length; i++) { |
739 | 94 | var ch = text.charAt(i); |
740 | 94 | var code = text.charCodeAt(i); |
741 | | |
742 | 94 | var append = false; |
743 | 94 | var last = this.tokens.length - 1; |
744 | 94 | var token = this.tokens[last]; |
745 | | |
746 | 94 | if (config.html && ch == ';') { |
747 | | // |
748 | 0 | if (last > 0 && token.type == 'WORD' && this.tokens[last - 1].s == '&') { |
749 | 0 | var name = token.s.toLowerCase(); |
750 | 0 | if (name in HTML_ENTITIES) { |
751 | 0 | ch = HTML_ENTITIES[name]; |
752 | 0 | code = ch.charCodeAt(0); |
753 | | |
754 | 0 | last -= 2; |
755 | 0 | token = this.tokens[last]; |
756 | 0 | this.tokens.length = last + 1; |
757 | | } |
758 | | } else |
759 | | // &x123AF5; |
760 | | // &1234; |
761 | 0 | if (last > 1 && (token.type == 'NUMBER' || (token.type == 'WORD' && token.s[0] == 'x')) && this.tokens[last - 1].s == '#' && this.tokens[last - 2].s == '&') { |
762 | 0 | if (token.s[0] == 'x') { |
763 | 0 | code = parseInt(token.s.substr(1), 16); |
764 | | } else { |
765 | 0 | code = parseInt(token.s, 10); |
766 | | } |
767 | 0 | ch = String.fromCharCode(code); |
768 | | |
769 | 0 | last -= 3; |
770 | 0 | token = this.tokens[last]; |
771 | 0 | this.tokens.length = last + 1; |
772 | | } |
773 | | } |
774 | | |
775 | 94 | var charType = 'OTHER'; |
776 | 94 | var charUpper = (ch.toLocaleLowerCase() != ch); |
777 | 112 | if (code >= 0x0400 && code <= 0x04FF) charType = 'CYRIL'; |
778 | 139 | if ((code >= 0x0041 && code <= 0x005A) || (code >= 0x0061 && code <= 0x007A) || (code >= 0x00C0 && code <= 0x024F)) charType = 'LATIN'; |
779 | 97 | if (code >= 0x0030 && code <= 0x0039) charType = 'DIGIT'; |
780 | 104 | if ((code <= 0x0020) || (code >= 0x0080 && code <= 0x00A0)) charType = 'SPACE'; |
781 | 107 | if ('‐-−‒–—―.…,:;?!¿¡()[]«»"\'’‘’“”/⁄'.indexOf(ch) > -1) charType = 'PUNCT'; |
782 | | |
783 | 94 | var tokenType = charType; |
784 | 94 | var tokenSubType = false; |
785 | 94 | if (charType == 'CYRIL' || charType == 'LATIN') { |
786 | 63 | tokenType = 'WORD'; |
787 | 63 | tokenSubType = charType; |
788 | | } else |
789 | 31 | if (charType == 'DIGIT') { |
790 | 3 | tokenType = 'NUMBER'; |
791 | | } |
792 | | |
793 | 94 | var lineStart = !token || token.s[token.s.length - 1] == '\n'; |
794 | | |
795 | 94 | if (config.wiki) { |
796 | 0 | if (lineStart) { |
797 | 0 | if (':;*#~|'.indexOf(ch) > -1) { |
798 | 0 | tokenType = 'MARKUP'; |
799 | 0 | tokenSubType = 'NEWLINE'; |
800 | | } |
801 | | } |
802 | 0 | if ('={[|]}'.indexOf(ch) > -1) { |
803 | 0 | tokenType = 'MARKUP'; |
804 | | } |
805 | | } |
806 | | |
807 | 94 | if (config.markdown) { |
808 | 0 | if (lineStart) { |
809 | 0 | if ('=-#>+-'.indexOf(ch) > -1) { |
810 | 0 | tokenType = 'MARKUP'; |
811 | 0 | tokenSubType = 'NEWLINE'; |
812 | | } |
813 | | } |
814 | 0 | if ('[]*~_`\\'.indexOf(ch) > -1) { |
815 | 0 | tokenType = 'MARKUP'; |
816 | | } |
817 | | } |
818 | | |
819 | 94 | if (token) { |
820 | 91 | if (config.wiki && ch != '\'' && token.s == '\'' && last > 0 && this.tokens[last - 1].type == 'WORD') { |
821 | 0 | this.tokens[last - 1].s += token.s; |
822 | 0 | this.tokens[last - 1].en = token.en; |
823 | 0 | this.tokens[last - 1].len += token.len; |
824 | | |
825 | 0 | last -= 1; |
826 | 0 | this.tokens.length = last + 1; |
827 | 0 | token = this.tokens[last]; |
828 | | } |
829 | | |
830 | | // Preprocess last token |
831 | 91 | if (config.links && config.links.tlds && |
832 | | (charType == 'PUNCT' || charType == 'SPACE') && |
833 | | this.tokens.length > 2 && |
834 | | this.tokens[last - 2].type == 'WORD' && |
835 | | this.tokens[last - 1].s == '.' && |
836 | | this.tokens[last].type == 'WORD' && |
837 | | this.tokens[last].s in config.links.tlds) { |
838 | | |
839 | | // Merge all subdomains |
840 | 2 | while (last >= 2 && |
841 | | this.tokens[last - 2].type == 'WORD' && |
842 | | (this.tokens[last - 1].s == '.' || this.tokens[last - 1].s == '@' || this.tokens[last - 1].s == ':')) { |
843 | 2 | last -= 2; |
844 | 2 | token = this.tokens[last]; |
845 | 2 | token.s += this.tokens[last + 1].s + this.tokens[last + 2].s; |
846 | 2 | token.allUpper = token.allUpper && this.tokens[last + 1].allUpper && this.tokens[last + 2].allUpper; |
847 | | } |
848 | | |
849 | 2 | if (config.emails && token.s.indexOf('@') > -1 && token.s.indexOf(':') == -1) { |
850 | | // URL can contain a '@' but in that case it should be in form http://user@site.com or user:pass@site.com |
851 | | // So if URL has a '@' but no ':' in it, we assume it's a email |
852 | 0 | token.type = 'EMAIL'; |
853 | | } else { |
854 | 2 | token.type = 'LINK'; |
855 | | |
856 | 2 | if (ch == '/') { |
857 | 0 | append = true; |
858 | | } |
859 | | } |
860 | 2 | this.tokens.length = last + 1; |
861 | | } else |
862 | | |
863 | | // Process next char (start new token or append to the previous one) |
864 | 89 | if (token.type == 'LINK') { |
865 | 14 | if (charType != 'SPACE' && ch != ',') { |
866 | 12 | append = true; |
867 | | } |
868 | | } else |
869 | 75 | if (token.type == 'EMAIL') { |
870 | 0 | if (charType == 'CYRIL' || charType == 'LATIN' || ch == '.') { |
871 | 0 | append = true; |
872 | | } |
873 | | } else |
874 | 75 | if (token.type == 'HASHTAG' || token.type == 'MENTION') { |
875 | 10 | if (charType == 'CYRIL' || charType == 'LATIN' || charType == 'DIGIT' || ch == '_' || (ch == '@' && token.s.indexOf('@') == -1)) { |
876 | 9 | append = true; |
877 | | } |
878 | | } else |
879 | 65 | if (token.type == 'TAG' && (token.quote || token.s[token.s.length - 1] != '>')) { |
880 | 0 | append = true; |
881 | 0 | if (token.quote) { |
882 | 0 | if (ch == token.quote && token.s[token.s.length - 1] != '\\') { |
883 | 0 | delete token.quote; |
884 | | } |
885 | | } else |
886 | 0 | if (ch == '"' || ch == '\'') { |
887 | 0 | token.quote = ch; |
888 | | } |
889 | | } else |
890 | 65 | if (token.type == 'CONTENT') { |
891 | 0 | append = true; |
892 | 0 | if (token.quote) { |
893 | 0 | if (ch == token.quote && token.s[token.s.length - 1] != '\\') { |
894 | 0 | delete token.quote; |
895 | | } |
896 | | } else |
897 | 0 | if (ch == '"' || ch == '\'') { |
898 | 0 | token.quote = ch; |
899 | | } else |
900 | 0 | if (ch == '<') { |
901 | 0 | append = false; |
902 | | } |
903 | | } else |
904 | 65 | if (token.type == 'TAG' && ch != '<' && token.s.substr(1, 6).toLowerCase() == 'script') { |
905 | 0 | tokenType = 'CONTENT'; |
906 | 0 | tokenSubType = 'SCRIPT'; |
907 | | } else |
908 | 65 | if (token.type == 'TAG' && ch != '<' && token.s.substr(1, 5).toLowerCase() == 'style') { |
909 | 0 | tokenType = 'CONTENT'; |
910 | 0 | tokenSubType = 'STYLE'; |
911 | | } else |
912 | 65 | if (config.html && token.s == '<' && (charType == 'LATIN' || ch == '!' || ch == '/')) { |
913 | 0 | append = true; |
914 | 0 | token.type = 'TAG'; |
915 | 0 | if (ch == '!') { |
916 | 0 | token.subType = 'COMMENT'; |
917 | | } else |
918 | 0 | if (ch == '/') { |
919 | 0 | token.subType = 'CLOSING'; |
920 | | } |
921 | | } else |
922 | 65 | if (token.type == 'CONTENT') { |
923 | 0 | append = true; |
924 | | } else |
925 | 65 | if (token.type == 'MARKUP' && token.subType == 'TEMPLATE' && (token.s[token.s.length - 1] != '}' || token.s[token.s.length - 2] != '}')) { |
926 | 0 | append = true; |
927 | | } else |
928 | 65 | if (token.type == 'MARKUP' && token.type == 'LINK' && token.s[token.s.length - 1] != ')') { |
929 | 0 | append = true; |
930 | | } else |
931 | 65 | if (token.type == 'MARKUP' && token.s[0] == '`' && token.subType == 'NEWLINE' && charType == 'LATIN') { |
932 | 0 | append = true; |
933 | | } else |
934 | 65 | if (charType == 'CYRIL' || charType == 'LATIN') { |
935 | 41 | if (token.type == 'WORD') { |
936 | 28 | append = true; |
937 | 28 | token.subType = (token.subType == charType) ? token.subType : 'MIXED'; |
938 | | } else |
939 | 13 | if (token.type == 'NUMBER') { // Digits + ending |
940 | 0 | append = true; |
941 | 0 | token.subType = (token.subType && token.subType != charType) ? 'MIXED' : charType; |
942 | | } else |
943 | 13 | if (config.hashtags && token.s == '#') { // Hashtags |
944 | 2 | append = true; |
945 | 2 | token.type = 'HASHTAG'; |
946 | | } else |
947 | 11 | if (config.mentions && token.s == '@' && (last == 0 || this.tokens[last - 1].type == 'SPACE')) { // Mentions |
948 | 0 | append = true; |
949 | 0 | token.type = 'MENTION'; |
950 | | } else |
951 | 11 | if (charType == 'LATIN' && (token.s == '\'' || token.s == '’')) { |
952 | 0 | append = true; |
953 | 0 | token.type = 'WORD'; |
954 | 0 | token.subType = 'LATIN'; |
955 | | } else |
956 | 11 | if (token.s == '-') { // -цать (?), 3-й |
957 | 0 | append = true; |
958 | | |
959 | 0 | if (last > 0 && this.tokens[last - 1].type == 'NUMBER') { |
960 | 0 | token = this.tokens[last - 1]; |
961 | 0 | token.s += this.tokens[last].s; |
962 | | |
963 | 0 | this.tokens.length -= 1; |
964 | | } |
965 | | |
966 | 0 | token.type = 'WORD'; |
967 | 0 | token.subType = charType; |
968 | | } |
969 | | } else |
970 | 24 | if (charType == 'DIGIT') { |
971 | 3 | if (token.type == 'WORD') { |
972 | 0 | append = true; |
973 | 0 | token.subType = 'MIXED'; |
974 | | } else |
975 | 3 | if (token.type == 'NUMBER') { |
976 | 1 | append = true; |
977 | | } else |
978 | 2 | if (token.s == '+' || token.s == '-') { |
979 | 0 | append = true; |
980 | | |
981 | 0 | if (last > 0 && this.tokens[last - 1].type == 'NUMBER') { |
982 | 0 | token = this.tokens[last - 1]; |
983 | 0 | token.s += this.tokens[last].s; |
984 | 0 | token.subType = 'RANGE'; |
985 | | |
986 | 0 | this.tokens.length -= 1; |
987 | | } |
988 | | |
989 | 0 | token.type = 'NUMBER'; |
990 | | } else |
991 | 2 | if ((token.s == ',' || token.s == '.') && this.tokens.length > 1 && this.tokens[last - 1].type == 'NUMBER') { |
992 | 0 | append = true; |
993 | | |
994 | 0 | token = this.tokens[last - 1]; |
995 | 0 | token.s += this.tokens[last].s; |
996 | | |
997 | 0 | this.tokens.length -= 1; |
998 | | } |
999 | | } else |
1000 | 21 | if (charType == 'SPACE') { |
1001 | 8 | if (token.type == 'SPACE') { |
1002 | 0 | append = true; |
1003 | | } |
1004 | | } else |
1005 | 13 | if (token.type == 'MARKUP' && token.s[0] == ch && '=-~:*#`\'>_'.indexOf(ch) > -1) { |
1006 | 0 | append = true; |
1007 | | } else |
1008 | 13 | if (ch == '.') { |
1009 | 6 | if (config.links && config.links.www && token.s.toLocaleLowerCase() == 'www') { // Links without protocol but with www |
1010 | 1 | append = true; |
1011 | 1 | token.type = 'LINK'; |
1012 | | } |
1013 | | } else |
1014 | 7 | if (config.wiki && ch == '\'') { |
1015 | 0 | if (token.s == '\'') { |
1016 | 0 | append = true; |
1017 | 0 | token.type = 'MARKUP'; |
1018 | | } else { |
1019 | 0 | tokenType = 'PUNCT'; |
1020 | | } |
1021 | | } else |
1022 | 7 | if (ch == '-' || ch == '’' || ch == '\'') { |
1023 | 0 | if (token.type == 'WORD') { |
1024 | 0 | append = true; |
1025 | | } |
1026 | | } else |
1027 | 7 | if (ch == '/') { |
1028 | 2 | if (config.links && config.links.protocols && |
1029 | | this.tokens.length > 2 && |
1030 | | this.tokens[last - 2].type == 'WORD' && |
1031 | | this.tokens[last - 2].subType == 'LATIN' && |
1032 | | this.tokens[last - 1].s == ':' && |
1033 | | this.tokens[last].s == '/') { // Links (with protocols) |
1034 | 1 | append = true; |
1035 | | |
1036 | 1 | token = this.tokens[last - 2]; |
1037 | 1 | token.s += this.tokens[last - 1].s + this.tokens[last].s; |
1038 | 1 | token.allUpper = token.allUpper && this.tokens[last - 1].allUpper && this.tokens[last].allUpper; |
1039 | 1 | token.type = 'LINK'; |
1040 | | |
1041 | 1 | this.tokens.length -= 2; |
1042 | | } |
1043 | | } else |
1044 | 5 | if (config.html && ch == ';') { |
1045 | 0 | if (last > 0 && token.type == 'WORD' && this.tokens[last - 1].s == '&') { |
1046 | 0 | append = true; |
1047 | | |
1048 | 0 | token = this.tokens[last - 1]; |
1049 | 0 | token.s += this.tokens[last].s; |
1050 | 0 | token.allUpper = token.allUpper && this.tokens[last - 1].allUpper; |
1051 | 0 | token.type = 'ENTITY'; |
1052 | | |
1053 | 0 | this.tokens.length -= 1; |
1054 | | } else |
1055 | 0 | if (last > 1 && (token.type == 'WORD' || token.type == 'NUMBER') && this.tokens[last - 1].s == '#' && this.tokens[last - 2].s == '&') { |
1056 | 0 | append = true; |
1057 | | |
1058 | 0 | token = this.tokens[last - 2]; |
1059 | 0 | token.s += this.tokens[last - 1].s + this.tokens[last].s; |
1060 | 0 | token.allUpper = token.allUpper && this.tokens[last - 1].allUpper && this.tokens[last].allUpper; |
1061 | 0 | token.type = 'ENTITY'; |
1062 | | |
1063 | 0 | this.tokens.length -= 2; |
1064 | | } |
1065 | | } else |
1066 | 5 | if (config.markdown && ch == '[' && token.s == '!') { |
1067 | 0 | append = true; |
1068 | 0 | token.type = 'MARKUP'; |
1069 | | } else |
1070 | 5 | if (config.markdown && ch == '(' && token.s == ']') { |
1071 | 0 | tokenType = 'MARKUP'; |
1072 | 0 | tokenSubType = 'LINK'; |
1073 | | } else |
1074 | 5 | if (config.wiki && ch == '{' && token.s == '{') { |
1075 | 0 | append = true; |
1076 | 0 | token.type = 'MARKUP'; |
1077 | 0 | token.subType = 'TEMPLATE'; |
1078 | | } else |
1079 | 5 | if (config.wiki && ch == '[' && token.s == '[') { |
1080 | 0 | append = true; |
1081 | | } else |
1082 | 5 | if (config.wiki && ch == ']' && token.s == ']') { |
1083 | 0 | append = true; |
1084 | | } else |
1085 | 5 | if (config.wiki && ch == '|' && !lineStart) { |
1086 | 0 | var found = -1; |
1087 | 0 | for (var j = last - 1; j >= 0; j--) { |
1088 | 0 | if (this.tokens[j].s == '[[') { |
1089 | 0 | found = j; |
1090 | 0 | break; |
1091 | | } |
1092 | 0 | if (this.tokens[j].s == '|' || this.tokens[j].s.indexOf('\n') > -1) { |
1093 | 0 | break; |
1094 | | } |
1095 | | } |
1096 | 0 | if (found > -1) { |
1097 | 0 | append = true; |
1098 | 0 | for (var j = last - 1; j >= found; j--) { |
1099 | 0 | token = this.tokens[j]; |
1100 | 0 | token.s += this.tokens[j + 1].s; |
1101 | 0 | token.allUpper = token.allUpper && this.tokens[j + 1].allUpper; |
1102 | | } |
1103 | 0 | last = found; |
1104 | 0 | this.tokens.length = last + 1; |
1105 | 0 | token.subType = 'LINK'; |
1106 | | } |
1107 | | } |
1108 | | } |
1109 | | |
1110 | 94 | if (append) { |
1111 | 54 | token.s += ch; |
1112 | | } else { |
1113 | 40 | token = { |
1114 | | type: tokenType, |
1115 | | s: ch, |
1116 | | st: i, |
1117 | | idx: this.tokens.length, |
1118 | | |
1119 | | firstUpper: charUpper, |
1120 | | allUpper: charUpper, |
1121 | | } |
1122 | 40 | if (tokenSubType) { |
1123 | 13 | token.subType = tokenSubType; |
1124 | | } |
1125 | 40 | this.tokens.push(token); |
1126 | | } |
1127 | 94 | token.en = i; |
1128 | 94 | token.length = (token.en - token.st) + 1; |
1129 | 94 | token.allUpper = token.allUpper && charUpper; |
1130 | | } |
1131 | 4 | return this; |
1132 | | } |
1133 | | |
1134 | 1 | Tokens.prototype.done = function(filter, exclude) { |
1135 | | // Finalize tokenizing, return list of tokens |
1136 | | // For now it just returns tokens, in the future there could be some additional work |
1137 | 4 | if (!filter) { |
1138 | 4 | return this.tokens; |
1139 | | } |
1140 | 0 | var list = []; |
1141 | 0 | for (var i = 0; i < this.tokens.length; i++) { |
1142 | 0 | if ((filter.indexOf(this.tokens[i].type) == -1) == exclude) { |
1143 | 0 | list.push(this.tokens[i]); |
1144 | | } |
1145 | | } |
1146 | 0 | return list; |
1147 | | } |
1148 | | |
1149 | 1 | Tokens.prototype.countTokens = function(filter, exclude) { |
1150 | 0 | if (!skipSpace && !skipPunct) { |
1151 | 0 | return this.tokens.length; |
1152 | | } |
1153 | 0 | var count = 0; |
1154 | 0 | for (var i = 0; i < this.tokens.length; i++) { |
1155 | 0 | if ((filter.indexOf(this.tokens[i].type) == -1) == exclude) { |
1156 | 0 | count++; |
1157 | | } |
1158 | | } |
1159 | 0 | return count; |
1160 | | } |
1161 | | |
1162 | 1 | Tokens.prototype.nextToken = function(moveIndex, filter, exclude) { |
1163 | 0 | var index = this.index; |
1164 | 0 | index++; |
1165 | 0 | while (index < this.tokens.length && filter && (filter.indexOf(this.tokens[index].type) != -1) == exclude) { |
1166 | 0 | index++; |
1167 | | } |
1168 | 0 | if (index < this.tokens.length) { |
1169 | 0 | if (moveIndex) { |
1170 | 0 | this.index = index; |
1171 | | } |
1172 | 0 | return this.tokens[index]; |
1173 | | } |
1174 | 0 | return null; |
1175 | | } |
1176 | | |
1177 | 1 | Tokens.prototype.punctAhead = function() { |
1178 | 0 | var token = this.nextToken(false, ['SPACE'], true); |
1179 | 0 | return token && token.type == 'PUNCT' && token; |
1180 | | } |
1181 | | |
1182 | 1 | Tokens.prototype.prevToken = function(moveIndex, filter, exclude) { |
1183 | 0 | var index = this.index; |
1184 | 0 | index--; |
1185 | 0 | while (index >= 0 && filter && (filter.indexOf(this.tokens[index].type) != -1) == exclude) { |
1186 | 0 | index--; |
1187 | | } |
1188 | 0 | if (index >= 0) { |
1189 | 0 | if (moveIndex) { |
1190 | 0 | this.index = index; |
1191 | | } |
1192 | 0 | return this.tokens[index]; |
1193 | | } |
1194 | 0 | return null; |
1195 | | } |
1196 | | |
1197 | 1 | Tokens.prototype.punctBehind = function() { |
1198 | 0 | var token = this.prevToken(false, ['SPACE'], true); |
1199 | 0 | return token && token.type == 'PUNCT' && token; |
1200 | | } |
1201 | | |
1202 | 1 | Tokens.prototype.hasTokensAhead = function(filter, exclude) { |
1203 | 0 | return this.nextToken(false, filter, exclude) != null; |
1204 | | } |
1205 | | |
1206 | 1 | Tokens.prototype.hasTokensBehind = function(filter, exclude) { |
1207 | 0 | return this.prevToken(false, filter, exclude) != null; |
1208 | | } |
1209 | | |
1210 | 1 | return Tokens; |
1211 | | })); |