Source: util/UnknownDictionaryBuilder.js

/*
 * Copyright Copyright 2014 Takuya Asano
 * Copyright 2010-2014 Atilika Inc. and contributors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

"use strict";

var UnknownDictionary = require("../dict/UnknownDictionary.js");
var CharacterDefinition = require("../dict/CharacterDefinition.js");  // TODO Remove this dependency


function UnknownDictionaryBuilder() {
    this.dictionary_entries = {};
}

/**
 * char_text contents of char.def
 * unk_text contents of unk.def
 */
UnknownDictionaryBuilder.prototype.build = function (char_text, unk_entries) {
    // TODO Create binary
    // TODO Use UnknownDictionary class
    var unk = this.readDictionaryFile(unk_entries);
    var char_def = CharacterDefinition.readCharacterDefinition(char_text); // Create CharacterDefinition (factory method)
    unk.characterDefinition(char_def);

    for (var token_info_id in this.dictionary_entries) {
        var class_name = this.dictionary_entries[token_info_id];
        var class_id = char_def.invoke_definition_map.lookup(class_name);

        // Assertion
        // if (trie_id < 0) {
        //     console.log("Not Found:" + surface_form);
        // }

        unk.addMapping(class_id, token_info_id);
    }

    return unk;
};


UnknownDictionaryBuilder.prototype.build = function (entries) {
    var dictionary = new UnknownDictionary();


    for (var i = 0; i < entries.length; i++) {
        var entry = entries[i];

        if (entry.length < 4) {
            continue;
        }

        var surface_form = entry[0];
        var left_id = entry[1];
        var right_id = entry[2];
        var word_cost = entry[3];
        var feature = entry.slice(4).join(",");  // TODO Optimize

        // Assertion
        if (!isFinite(left_id) || !isFinite(right_id) || !isFinite(word_cost)) {
            console.log(entry);
        }

        var token_info_id = dictionary.put(left_id, right_id, word_cost, surface_form, feature);
        this.dictionary_entries[token_info_id] = surface_form;
    }

    // Remove last unused area
    dictionary.dictionary.shrink();
    dictionary.pos_buffer.shrink();

    return dictionary;
};


module.exports = UnknownDictionaryBuilder;