/**
* Copyright 2014 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
'use strict';
var pick = require('object.pick');
var extend = require('extend');
var requestFactory = require('../lib/requestwrapper');
var util = require('util');
var BaseService = require('../lib/base_service');
/**
*
* @param options
* @constructor
*/
function TextToSpeechV1(options) {
BaseService.call(this, options);
}
util.inherits(TextToSpeechV1, BaseService);
TextToSpeechV1.prototype.name = 'text_to_speech';
TextToSpeechV1.prototype.version = 'v1';
TextToSpeechV1.URL = 'https://stream.watsonplatform.net/text-to-speech/api';
/**
* Streaming speech synthesis of the text in a query parameter
*
* @param {Object} params
* @param {String} params.text
* @param {String} [params.voice=en-US_MichaelVoice] - Call .voices() for a complete list
* @param {String} [params.accept=audio/ogg;codecs=opus] - Supported formats are audio/ogg;codecs=opus, audio/wav, audio/flac, audio/l16, audio/basic
* @param {Boolean} [params.X-Watson-Learning-Opt-Out]
* @param {String} [params.customization_id]
* @param {Function} callback
*/
TextToSpeechV1.prototype.synthesize = function(params, callback) {
params = extend({accept:'audio/ogg; codecs=opus'}, params);
if (!params.text){
callback(new Error('Missing required parameters: text'));
return;
}
var parameters = {
options: {
method: 'POST',
url: '/v1/synthesize',
body: JSON.stringify(pick(params, ['text'])),
qs: pick(params, ['accept', 'voice', 'customization_id']),
headers: extend({
'content-type': 'application/json'
}, pick(params, ['X-Watson-Learning-Opt-Out'])),
encoding: null
},
defaultOptions: this._options
};
return requestFactory(parameters, callback);
};
// todo: add websocket support
// http://www.ibm.com/watson/developercloud/text-to-speech/api/v1/?curl#www_synthesize12
/**
* Retrieves the voices available for speech synthesis
* @param {Object} params
* @param {Function} callback
*/
TextToSpeechV1.prototype.voices = function(params, callback) {
var parameters = {
options: {
method: 'GET',
url: '/v1/voices',
json: true
},
defaultOptions: this._options
};
return requestFactory(parameters, callback);
};
/**
* Retrieves information about the specified voice
*
* @param {Object} params
* @param {String} params.voice
* @param {String} [params.customization_id]
*/
TextToSpeechV1.prototype.voice = function(params, callback) {
var parameters = {
requiredParams: ['voice'],
options: {
method: 'GET',
url: '/v1/voices/{voice}',
path: pick(params, ['voice']),
qs: pick(params, ['customization_id']),
json: true
},
defaultOptions: this._options
};
return requestFactory(parameters, callback);
};
/**
* Returns the phonetic pronunciation for the specified word.
*
* @param {Object} params
* @param {String} params.text - a single word
* @param {String} [params.format=ipa] - Supported formats are ipa, spr for US English, or spr for other languages
* @param {String} [params.voice] - Defaults to en-US_MichaelVoice unless a customization_id is specified. Do not specify both a voice and a customization_id
* @param {String} [params.customization_id] - do not specify both a voice and a customization_id
*/
TextToSpeechV1.prototype.pronunciation = function(params, callback) {
var parameters = {
requiredParams: ['text'],
options: {
method: 'GET',
url: '/v1/pronunciation',
qs: pick(params, ['text', 'voice', 'format', 'customization_id']),
json: true
},
defaultOptions: this._options
};
return requestFactory(parameters, callback);
};
/**
* new customization features
Summary of API calls
API Number Method API URL Description
API-01 POST /api/v1/customizations Create new custom model
API-02 GET /api/v1/customizations?language="en-US" List custom models for a language
API-03 DELETE /api/v1/customizations/{customization_id} Delete custom model
API-04 GET /api/v1/customizations/{customization_id}?<alphabet=spr> Query contents of custom model (optionally converts IPA to SPR phones)
API-05 POST /api/v1/customizations/{customization_id} Update contents of custom model, including adding one or more words
API-06 PUT /api/v1/customizations/{customization_id}/words/{word} Add a single word to custom model
API-07 POST /api/v1/customizations/{customization_id}/words Add one or more words to custom model
API-08 GET /api/v1/customizations/{customization_id}/words/{word} Query details for a word in custom model
API-09 GET /api/v1/customizations/{customization_id}/words List contents of words in custom model
API-10 DELETE /api/v1/customizations/{customization_id}/words/{word} Delete single word from custom model
API-11 GET /api/v1/pronunciation?text=aword&voice=voiceModel&format=ipa|spr Gets the IPA (or optionally the SPR) pronunciation of a given word for a given voice model (default=en-US_MichaelVoice)
TTS API Implementation
*/
/*
Create Custom Voice Model
Creates a new custom voice model for the specified language. Note: not all languages are supported (only US English for Beta release).
API:
POST /text-to-speech/api/v1/customizations
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations
Request:
{
"name":"my custom voice",
"language":"en-US",
"description":"This is my first custom voice"
}
Input:
name -- string, not null. Voice model name;
language -- string, null/not null. If language is null, default is en-US .
description -- string, null/not null. Description of the custom voice model.
Output:
If success --
{
"customization_id":"74f4807e-b5ff-4866-824e-6bba1a84fe96"
}
Here 74f4807e-b5ff-4866-824e-6bba1a84fe96 is the UUID of new voice model.
If failed --
{
"code":400,
"error":"Invalid value for 'language'.",
"code_description":"Bad Request"
}
Http Status Code:
HTTP Code
Notes
200 Success
400 Invalid parameter 'parameter-name' in request.
400 Required parameter 'parameter-name' is missing.
400 This 'language' is currently not supported.
500 Service internal error
*/
TextToSpeechV1.prototype.createCustomizations = function(params, callback) {
var parameters = {
options: {
method: 'POST',
url: '/v1/customizations',
body: JSON.stringify(pick(params, ['text'])),
qs: pick(params, ['accept', 'voice']),
headers: extend({
'content-type': 'application/json'
}, pick(params, ['X-Watson-Learning-Opt-Out'])),
json: true
},
defaultOptions: this._options
};
return requestFactory(parameters, callback);
};
/*
List Custom Voices
List all available custom voice models for a language (or all languages).
API:
GET /text-to-speech/api/v1/customizations?language="en-US"
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations?language=en-US
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations?language=en-US
Request: empty body
Input:
language -- string, null/not null. If it is null, API returns all custom voice models owned by the requester (based on bluemix-instance-id); if it's not null, returns custom voice models for this specified language id owned by the requester;
Output:
If success --
{
"customizations":[
{
"customization_id":"1fa3b971-45ad-4da5-a2a0-019d6eb24227",
"created":1432114231877,
"language":"en-US",
"owner":"GUID",
"name":"Lorem sit amet",
"description":"Lorem ipsum dolor sit amet, consectetur adipisicing elit. Beatae officiis similique consequuntur rem cumque rerum ex qui quo odio eligendi quia, consequatur quos suscipit id inventore itaque est aliquid! Temporibus reiciendis debitis tempore distinctio excepturi voluptate consequatur, iste illum dicta, voluptatibus alias doloribus quaerat fugiat eius odit magnam voluptatum officia."
}
]
}
Here 'customizations' is a list of custom voice models, 'owner' is a GUID which associates the user with the custom model, 'created' is Unix-time of when the model was created, 'customization_id' is a GUID associated with the custom model and 'language' is the language of the model.
If failed --
{
"code":400,
"error":"Invalid value for 'language'.",
"code_description":"Bad Request"
}
Http Status Code:
HTTP Code
Notes
200
Success
400
Invalid value for 'language'.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/ListDictionaries.html
Delete Custom Voice Model
Delete a custom voice model.
API:
DELETE /text-to-speech/api/v1/customizations/{customization_id}
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
Request: empty body
Input:
customization_id -- UUID string, not null.
Output:
If success --
Empty body
If failed --
{
"code":401,
"error":"Invalid customization_id (XXX) for user.",
"code_description":Unauthorized"
}
Http Status Code:
HTTP Code
Notes
204
Success
400
Invalid value for 'customization_id'.
401 Invalid customization_id (xxx) for user
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/DeleteDictionary.html
Query Custom Voice Model
List contents of custom voice model.
API:
GET /text-to-speech/api/v1/customizations/{customization_id}?<alphabet=spr>
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
Request: empty body
Input:
customization_id -- GUID string, not null.
alphabet -- an optional query parameter which tells system to return original pronunciation or if set to "spr", pronunciations after conversion to SPR phoneme set (this is needed by TTS engine). Only words with phonetic pronunciations are converted to SPR (so sounds-like remain as sounds-like)
Output:
If success --
{
"customization_id":"7524b592-aeb5-4ab3-a3a6-639dfc4eaf4b",
"created":1432021460585,
"language":"en-US",
"owner":GUID,
"name":"Lorem ipsum dolor",
"last_modified":1439955282335,
"description":"Lorem ipsum dolor sit amet, consectetur adipisicing elit. Repellendus amet assumenda nostrum ea, nesciunt veniam voluptates. Sapiente labore accusamus illum placeat voluptatem, architecto repellat fugit blanditiis velit in esse vitae, necessitatibus ducimus nobis? Natus accusantium maiores corporis ipsam praesentium, totam iusto minus temporibus sapiente, aperiam, id ullam vero adipisci accusamus.",
"words": [
{"word":"gastroenteritis", "translation":"`[1gAstroEntxrYFXs]"},
{"word": "trinitroglycerin", "translation":"try nitro glycerin"},
{"word":"div.", "translation":"division"},
{"word": "proc.", "translation":"proceedings"]
}
Here 'words' is a list of the words in the custom model, 'owner' is a GUID which associates the user with the custom model, 'created' is Unix-time of when the model was created, 'last_modified' is Unix-time of when the model was last modified, 'customization_id' is a GUID associated with the custom model and 'language' is the language of the model.
If failed --
{
"code":401,
"error":"Invalid customization_id (XXX) for user.",
"code_description":Unauthorized"
}
Http Status Code:
HTTP Code
Notes
200
Success
400
Invalid value for 'customization_id'.
401 Invalid customization_id (xxx) for user.
500
Service internal error
304 Not modified
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/QueryDictionary.html
Update Custom Voice
Update custom voice information (name or description). Can also be used to add one or more words to the dictionary.
API:
POST /text-to-speech/api/v1/customizations/{customization_id}
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96
Request:
{
"name":"new voice name",
"description":"This is my new custom voice",
"words": [
{"word":"gastroenteritis", "translation":"<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>"},
{"word":"tomato", "translation":"<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>"},
{"word": "trinitroglycerin", "translation":"try nitro glycerin"},
{"word":"div.", "translation":"division"},
{"word": "proc.", "translation":"proceedings"}]
}
Input:
customization_id -- UUID string, not null.
name -- string, null/not null. If it's null, voice name field will not be updated.
description -- string, null/not null. If it's null, description field will not be updated.
words -- a JSON array composed of 'word' objects. If array is empty, word entries will not be updated.
word -- a JSON object composed of "word":"word_string" and "translation":"pronunciation|sounds-like string". The format for the pronunciation is based on the SSML format for representing the phonetic string of a word. That is, "<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>" for adding an IPA pronunciation or "<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>" for adding an SPR pronunciation. SPR is the native IBM phone set.
Output:
If success --
Empty body
If failed --
{
"code":401,
"error":"Invalid customization_id (XXX) for user.",
"code_description":Unauthorized"
}
Http Status Code:
HTTP Code
Notes
201
Success
400 Invalid parameter 'parameter-name' in request.
400
Invalid value for 'customization_id'.
400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.
401 Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionary.html
Update Custom Voice Word
Adds a single word to a given customization_id
API:
PUT /text-to-speech/api/v1/customizations/{customization_id}/words/{word}
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
Request:
{
"translation":"pronunciation|sounds-like"
}
Input:
word -- string, containing word to add
translation - a valid IPA or SPR pronunciation and/or one or more words which when combined sound like the word being added - the pronunciation should be in SSML format, e.g. "<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>" for adding an IPA pronunciation or "<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>" for adding an SPR pronunciation. SPR is the native IBM phone set.
Output:
If success --
Empty body
If failed --
{
"code":401|400,400,
"error":"Invalid customization_id (XXX) for user" | "Invalid request" | "In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.",
"code_description":Unauthorized"|"Bad Request"|"Bad Request"
}
Http Status Code:
HTTP Code
Notes
201
Success
400 Invalid parameter 'parameter-name' in request.
400
Invalid value for 'customization_id'.
400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.
401
Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionaryWord.html
Update Custom Voice Words
Adds one or more words and their associated pronunciation to a given customization_id
API:
POST /text-to-speech/api/v1/customizations/{customization_id}/words
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words
Request:
{
"words": [
{"word":"gastroenteritis", "translation":"<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>"},
{"word": "trinitroglycerin", "translation":"try nitro glycerin"},
{"word":"div.", "translation":"division"},
{"word": "proc.", "translation":"proceedings"]
}
Input:
words -- JSON array of JSON tuples containing word and translation objects
word -- string,containing word to add
translation - valid IPA pronunciation and/or one or more words which when combined sound like the word being added
Output:
If success --
Empty body
If failed --
{
"code":401|400,400,
"error":"Invalid customization_id (XXX) for user" | "Invalid request" | "In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.",
"code_description":Unauthorized"|"Bad Request"|"Bad Request"
}
Http Status Code:
HTTP Code
Notes
201
Success
400
Invalid value for 'customization_id'.
400 Invalid parameter 'parameter-name' in request.
400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.
401
Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionaryWords.html
Get Word
Get details for a single word belonging to customization_id
API:
GET /text-to-speech/api/v1/customizations/{customization_id}/words/{word}
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
Request:
{
}
Input:
word -- string,containing word to add
Output:
If success --
{
"translation":"pronunciation|sounds-like"
}
If failed --
{
"code":400,
"error":"Word: <aword> not found in customization_id: XXX",
"code_description":"Bad Request"
}
Http Status Code:
HTTP Code
Notes
200
Success
400
Invalid value for 'customization_id'.
400 Word: xxx not found in customization_id: xxx-xxx
401
Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/GetWord.html
Get Words
List all the words associated with a customization_id
API:
GET /text-to-speech/api/v1/customizations/{customization_id}/words
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words
Request:
{
}
Input:
Output:
If success --
{
"words": [
{"word": "aword", "translation":"pronunciation|sounds-like"},
{"word": "anotherword", "translation":"pronunciation|sounds-like"},
...
]
}
If failed --
{
"code":401,
"error":"Invalid customization_id (XXX) for user.",
"code_description":Unauthorized"
}
Http Status Code:
HTTP Code
Notes
200
Success
400
Invalid value for 'customization_id'.
401
Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/GetWords.html
Delete Word
Deletes a single word belonging to customization_id
API:
DELETE /text-to-speech/api/v1/customizations/{customization_id}/words/{word}
example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword
Request:
{
}
Input:
word -- string,containing word to delete
Output:
If success --
Empty body
If failed --
{
"code":401,
"error":"Invalid customization_id (XXX) for user.",
"code_description":Unauthorized"
}
Http Status Code:
HTTP Code
Notes
204
No Content (means success but no content being returned)
400
Invalid value for 'customization_id'.
400 Word: xxx not found in customization_id: xxx-xxx
401
Invalid customization_id (xxx) for user.
500
Service internal error
Testing case:
http://9.186.107.61:9080/text-to-speech/test/tts/DeleteWord.html
Get Pronunciation
Get the pronunciation for a word. By default, the pronunciation is returned in IPA phone set.
API:
GET /text-to-speech/api/v1/pronunciation?voice=xx-XX_VoiceModel&text="aword"&format="ipa|ibm"
example: http://9.186.107.61:9080/speechct/text-to-speech/api/v1/pronunciation?voice=en-US_LisaVoice&text=aword
https://stream-d.watsonplatform.net/text-to-speech/api/v1/pronunciation?voice=en-US_LisaVoice&text=aword
Request:
{
}
Input:
text -- string, contains word for which pronunciation is requested
voice -- string, null or not null. If it is null, assumes the request is for the default voice; if it's not null, returns pronunciation using the phoneme set specified in the 'format' parameter for the given 'voice'
format -- specifies which phoneme set to use to return the pronunciation, either "ipa" or "spr" (default is "ipa")
Output:
If success --
{
"pronunciation":"pronunciation"
}
If failed --
{
"code":404
"error":"Model <XXX> not found"
"code_description":"Not Found"
}
Cassandra database structure
Model Name: ttsmodel
Column Name Data Type Key Type Comments
userid varchar PARTITION KEY string comes from Bluemix
dictid uuid PRIMARY KEY, INDEX an unique UUID string
createtime timestamp PRIMARY KEY sort by desc
dictname varchar created by user
lang int language id
description text
dictionary list<text> the content words of dictionary
lastmodified timestamp used for If-Modified-Since
CQL:
CREATE KEYSPACE speechct WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };
use speechct;
CREATE TABLE ttsmodel (userid varchar, dictid uuid, createtime timestamp, dictname varchar, lang int, description text, dictionary list<text>, lastmodified timestamp, PRIMARY KEY (userid,createtime,dictid)) WITH CLUSTERING ORDER BY (createtime DESC);
create index dictid_index on ttsmodel(dictid);
create index lang_index on ttsmodel(lang);
*/
module.exports = TextToSpeechV1;