Home Reference Source Repository

js/util/_TextReader.js

'use strict'

import {UnescapeSJIS, UnescapeEUCJP, UnescapeJIS7, UnescapeJIS8, 
        UnescapeUnicode, UnescapeUTF7, UnescapeUTF8, UnescapeUTF16LE} from '../third_party/ecl'
/*global Buffer*/

const _integerPattern = new RegExp(/^(-|\+)?\d+;?/)
const _floatPattern = new RegExp(/^(-|\+)?(\d)*\.(\d)*;?/)
const _wordPattern = new RegExp(/^\w+/)
const _linePattern = new RegExp(/^.*\n/)

/**
 * TextReader class
 * @access public
 */
export default class TextReader {
  /**
   * constructor
   * @access public
   * @constructor
   * @param {Buffer|ArrayBuffer} data -
   * @param {string} encoding -
   */
  constructor(data, encoding = 'utf-8') {
    /**
     * @access private
     * @type {number}
     */
    this._pos = 0

    this._partialText = ''
    this._partialOffset = 0
    this._partialStep = 200
    this._partialMinLength = 100

    /**
     * @access private
     * @type {boolean}
     */
    this._eof = true

    /**
     *
     * @access public
     * @type {Buffer}
     */
    this.buffer = null

    if(data instanceof Buffer){
      this.buffer = data
    }else{
      this.buffer = Buffer.from(data)
    }

    /**
     *
     * @access public
     * @type {boolean}
     */
    //this.bigEndian = bigEndian

    /**
     *
     * @access public
     * @type {string}
     */
    this.encoding = encoding

    // prepare buffered text
    this._addPartialText()
  }

  /**
   * @access public
   * @param {number} length - length of data to skip
   * @param {boolean} noAssert -
   * @returns {void}
   */
  skip(length, noAssert = false) {
    this._moveIndex(length)
    if(!noAssert){
      this._check()
    }
  }

  /**
   *
   * @access public
   * @param {number} length - length of data to read
   * @param {?string} [encoding = null] -
   * @returns {string} -
   */
  readString(length, encoding = null) {
    const str = this._partialText.substring(0, length)

    this._moveIndex(str.length)
  }

  /**
   *
   * @access public
   * @param {number} length - 
   * @param {boolean} signed -
   * @returns {number} -
   */
  readInteger(length, signed) {
    const str = this._getString(_integerPattern)
    const val = parseInt(str[0], 10)
    return val
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readUnsignedByte() {
    return this.readInteger(1, false)
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readUnsignedShort() {
    return this.readInteger(2, false)
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readUnsignedInt() {
    return this.readInteger(4, false)
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readInt() {
    return this.readInteger(4, true)
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readFloat() {
    const str = this._getString(_floatPattern)
    const val = parseFloat(str[0])
    return val
  }

  /**
   *
   * @access public
   * @returns {number} -
   */
  readDouble() {
    return this.readFloat()
  }

  /**
   *
   * @access public
   * @param {number} length -
   * @returns {Buffer} -
   */
  readData(length) {
    const start = this._pos
    this._pos += length
    return this.buffer.slice(start, this._pos)
  }

  readWord() {
    const str = this._getString(_wordPattern)
    return (str !== null ? str[0] : null)
  }

  readLine() {
    const str = this._getString(_linePattern)
    return (str !== null ? str[0] : null)
  }

  readPattern(pattern) {
    return this._getString(pattern)
  }

  /**
   *
   * @access private
   * @returns {void}
   */
  _check() {
  }

  /**
   *
   * @access private
   * @param {number[]} data - length of data to convert
   * @param {?string} [encoding = null] -
   * @returns {string} -
   */
  _convert(data, encoding) {
    const length = data.length
    let escapeString = ''
    for(let i=0; i<length; i++){
      const charCode = data.charCodeAt(i)
      if(charCode === 0){
        break
      }
      else if(charCode < 16){
        escapeString += '%0' + charCode.toString(16)
      }else{
        escapeString += '%' + charCode.toString(16)
      }
    }
      
    if(encoding === 'sjis'){
      return UnescapeSJIS(escapeString)
    }else if(encoding === 'euc-jp'){
      return UnescapeEUCJP(escapeString)
    }else if(encoding === 'jis-7'){
      return UnescapeJIS7(escapeString)
    }else if(encoding === 'jis-8'){
      return UnescapeJIS8(escapeString)
    }else if(encoding === 'unicode'){
      return UnescapeUnicode(escapeString)
    }else if(encoding === 'utf7'){
      return UnescapeUTF7(escapeString)
    }else if(encoding === 'utf-8'){
      return UnescapeUTF8(escapeString)
    }else if(encoding === 'utf-16'){
      return UnescapeUTF16LE(escapeString)
    }

    throw new Error(`unsupported encoding: ${encoding}`)
  }

  getAvailableDataLength() {
    return this.buffer.length - this._pos
  }

  /**
   *
   * @access private
   * @param {number} len -
   * @returns {void}
   */
  _moveIndex(len) {
    this._partialText = this._partialText.substring(len)
    if(this._partialText.length < this._partialMinLength){
      this._addPartialText()
    }
  }

  _skipSpace() {
    let i = 0
    let code = this._partialText.charCodeAt(i)

    //  9: Horizontal Tab
    // 10: Line Feed
    // 11: Vertical Tab
    // 12: New Page
    // 13: Carriage Return
    // 32: Space
    while(code === 32 || (9 <= code && code <= 13)){
      i++
      code = this._partialText.charCodeAt(i)

      if(i >= this._partialText.length){
        this._addPartialText()
      }
    }
    if(i>0){
      this._moveIndex(i)
    }
  }

  _addPartialText() {
    if(this._partialOffset >= this.buffer.length){
      return
    }

    let newOffset = this._partialOffset + this._partialStep
    if(newOffset > this.buffer.length){
      newOffset = this.buffer.length
    }

    if(Buffer.isEncoding(this.encoding)){
      this._partialText += this.buffer.toString(this.encoding, this._partialOffset, newOffset)
    }else{
      const data = this.buffer.toString('binary', this._partialOffset, newOffset)
      this._partialText += this._convert(data, this.encoding)
    }
    this._partialOffset = newOffset
  }

  _getString(pattern) {
    this._skipSpace()

    const str = this._partialText.match(pattern)
    if(str === null){
      return null
    }

    this._moveIndex(str[0].length)

    return str
  }
}