/*
* Copyright Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use strict";
var UnknownDictionary = require("../dict/UnknownDictionary.js");
var CharacterDefinition = require("../dict/CharacterDefinition.js"); // TODO Remove this dependency
function UnknownDictionaryBuilder() {
this.dictionary_entries = {};
}
/**
* char_text contents of char.def
* unk_text contents of unk.def
*/
UnknownDictionaryBuilder.prototype.build = function (char_text, unk_entries) {
// TODO Create binary
// TODO Use UnknownDictionary class
var unk = this.readDictionaryFile(unk_entries);
var char_def = CharacterDefinition.readCharacterDefinition(char_text); // Create CharacterDefinition (factory method)
unk.characterDefinition(char_def);
for (var token_info_id in this.dictionary_entries) {
var class_name = this.dictionary_entries[token_info_id];
var class_id = char_def.invoke_definition_map.lookup(class_name);
// Assertion
// if (trie_id < 0) {
// console.log("Not Found:" + surface_form);
// }
unk.addMapping(class_id, token_info_id);
}
return unk;
};
UnknownDictionaryBuilder.prototype.build = function (entries) {
var dictionary = new UnknownDictionary();
for (var i = 0; i < entries.length; i++) {
var entry = entries[i];
if (entry.length < 4) {
continue;
}
var surface_form = entry[0];
var left_id = entry[1];
var right_id = entry[2];
var word_cost = entry[3];
var feature = entry.slice(4).join(","); // TODO Optimize
// Assertion
if (!isFinite(left_id) || !isFinite(right_id) || !isFinite(word_cost)) {
console.log(entry);
}
var token_info_id = dictionary.put(left_id, right_id, word_cost, surface_form, feature);
this.dictionary_entries[token_info_id] = surface_form;
}
// Remove last unused area
dictionary.dictionary.shrink();
dictionary.pos_buffer.shrink();
return dictionary;
};
module.exports = UnknownDictionaryBuilder;