Options
All
  • Public
  • Public/Protected
  • All
Menu

External module "lib/submod/PunctuationTokenizer"

Index

Variables

Let STOPWORD

STOPWORD: object = {} as {[key: string]: number,}

Type declaration

  • [key: string]: number

Let STOPWORD2

STOPWORD2: object = {} as {[key: number]: typeof STOPWORD,}

Type declaration

  • [key: number]: object
    • [key: string]: number

Let _STOPWORD

_STOPWORD: string[] = (' ,.;+-|/\\\'":?<>[]{}=!@#$%^&*()~`' +'。,、':∶;?‘’“”〝〞ˆˇ﹕︰﹔﹖﹑·¨….¸;!´?!~—ˉ|‖"〃`@﹫¡¿﹏﹋﹌︴々﹟#﹩$﹠&﹪%*﹡﹢﹦' +'﹤‐ ̄¯―﹨ˆ˜﹍﹎+=<­__-\ˇ~﹉﹊()〈〉‹›﹛﹜『』〖〗[]《》〔〕{}「」【】︵︷︿︹︽_﹁﹃︻︶︸' +'﹀︺︾ˉ﹂﹄︼+-×÷﹢﹣±/=≈≡≠∧∨∑∏∪∩∈⊙⌒⊥∥∠∽≌<>≤≥≮≯∧∨√﹙﹚[]﹛﹜∫∮∝∞⊙∏' +'┌┬┐┏┳┓╒╤╕─│├┼┤┣╋┫╞╪╡━┃└┴┘┗┻┛╘╧╛┄┆┅┇╭─╮┏━┓╔╦╗┈┊│╳│┃┃╠╬╣┉┋╰─╯┗━┛' +'╚╩╝╲╱┞┟┠┡┢┦┧┨┩┪╉╊┭┮┯┰┱┲┵┶┷┸╇╈┹┺┽┾┿╀╁╂╃╄╅╆' +'○◇□△▽☆●◆■▲▼★♠♥♦♣☼☺◘♀√☻◙♂×▁▂▃▄▅▆▇█⊙◎۞卍卐╱╲▁▏↖↗↑←↔◤◥╲╱▔▕↙↘↓→↕◣◢∷▒░℡™').split('') as any as string[]

Let segment

segment: Segment

Const type

type: "tokenizer" = "tokenizer"

模块类型

Functions

init

  • init(_segment: any): void

matchStopword

  • matchStopword(text: string, cur?: number): IWord[]
  • 匹配包含的标点符号,返回相关信息

    Parameters

    • text: string

      文本

    • Optional cur: number

      开始位置

    Returns IWord[]

    返回格式 {w: '网址', c: 开始位置}

split

Generated using TypeDoc