Source: services/speech_to_text/v1.js

/**
 * Copyright 2014 IBM Corp. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

'use strict';

var extend         = require('extend');
var helper         = require('../../lib/helper');
var cookie         = require('cookie');
var pick           = require('object.pick');
var url            = require('url');
var https          = require('https');
var http           = require('http');
var isStream       = require('isstream');
var requestFactory = require('../../lib/requestwrapper');
var qs             = require('querystring');
var Duplex         = require('stream').Duplex;
var util           = require('util');
var WebSocketClient = require('websocket').client;
var pkg         = require('../../package.json');

function formatChunk(chunk) {
  // Convert the string into an array
  var result = chunk;

  // Check if in the stream doesn't have
  // two results together and parse them
  if (!result || result.indexOf('}{') === -1)
    return JSON.parse(result);

  // Check if we can parse the response
  try {
    result = '[' + result.replace(/}{/g, '},{') + ']';
    result = JSON.parse(result);
    return result[result.length - 1];
  } catch (e) {}

  return result;
}

///**
// * Speech Recognition API Wrapper
// * @lends speech_to_text
// */
function SpeechToText(options) {
  // Default URL
  var serviceDefaults = {
    url: 'https://stream.watsonplatform.net/speech-to-text/api'
  };

  // Replace default options with user provided
  this._options = extend(serviceDefaults, options);
}
/**
 * Replaces recognizeLive & friends with a single 2-way stream over websockets
 * @param params
 * @param callback
 * @returns {*}
 */
SpeechToText.prototype.recognizeWs = function(params, callback) {

  var missingParams = helper.getMissingParams(params, ['audio', 'content_type']);
  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }
  if (!isStream(params.audio)) {
    callback(new Error('audio is not a standard Node.js Stream'));
    return;
  }

  var queryParams = pick(params, ['continuous', 'max_alternatives', 'timestamps',
    'word_confidence','inactivity_timeout', 'model']);

  var _url = '/v1';
  _url += (params.session_id) ? ('/sessions/' + params.session_id) : '';
  _url += '/recognize';

  var parameters = {
    options: {
      method: 'POST',
      url: _url,
      headers: {
        'Content-Type': params.content_type
      },
      json: true,
      qs: queryParams,
    },
    defaultOptions: this._options
  };
  return params.audio.on('response', function(response) {
    // Replace content-type
    response.headers['content-type'] = params.content_type;
  }).pipe(requestFactory(parameters, callback));
};

/**
 * Speech recognition for given audio using default model.
 *
 * @param {Audio} [audio] Audio to be recognized.
 * @param {String} [content_type] Content-type
 */
SpeechToText.prototype.recognize = function(params, callback) {

  var missingParams = helper.getMissingParams(params, ['audio', 'content_type']);
  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }
  if (!isStream(params.audio)) {
    callback(new Error('audio is not a standard Node.js Stream'));
    return;
  }

  var queryParams = pick(params, ['continuous', 'max_alternatives', 'timestamps',
    'word_confidence','inactivity_timeout', 'model']);

  var _url = '/v1';
  _url += (params.session_id) ? ('/sessions/' + params.session_id) : '';
  _url += '/recognize';

  var parameters = {
    options: {
      method: 'POST',
      url: _url,
      headers: {
        'Content-Type': params.content_type
      },
      json: true,
      qs: queryParams,
    },
    defaultOptions: this._options
  };
  return params.audio.on('response', function(response) {
    // Replace content-type
    response.headers['content-type'] = params.content_type;
  }).pipe(requestFactory(parameters, callback));
};

/**
 * Creates a HTTP/HTTPS request to /recognize and keep the connection open.
 * Sets 'Transfer-Encoding': 'chunked' and prepare the connection to send
 * chunk data
 *
 * @param {String} [content_type] The Content-type e.g. audio/l16; rate=48000
 * @param {String} [session_id] The session id
 */
SpeechToText.prototype.recognizeLive = function(params, callback) {
  var missingParams = helper.getMissingParams(params,
    ['session_id', 'content_type', 'cookie_session']);

  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }

  var serviceUrl = [this._options.url, '/v1/sessions/', params.session_id, '/recognize'].join('');
  var parts = url.parse(serviceUrl);
  var options = {
    agent: false,
    host: parts.hostname,
    port: parts.port,
    path: parts.pathname + (params.continuous == true ? '?continuous=true' : ''),
    method: 'POST',
    headers: {
      'Authorization': 'Basic ' + this._options.api_key,
      'Transfer-Encoding': 'chunked',
      'cookie': 'SESSIONID=' + params.cookie_session,
      'Content-type': params.content_type
    }
  };
  var protocol = (parts.protocol.match('http:')) ? http : https;
  var recognize_req = protocol.request(options, function(result) {
    result.setEncoding('utf-8');
    var transcript = '';

    result.on('data', function(chunk) {
      transcript += chunk;
    });

    result.on('end', function() {
      try {
        transcript = formatChunk(transcript);
      } catch (e) {
        callback(transcript);
        return;
      }
      callback(null, transcript);
    });
  });

  recognize_req.on('error', function(error) {
    callback(error);
  });
  return recognize_req;
};

/**
 * Result observer for upcoming or ongoing recognition task in the session.
 * This request has to be started before POST on recognize finishes,
 * otherwise it waits for the next recognition.
 *
 * @param {String} [params.session_id] Session used in the recognition.
 * @param {boolean} [params.interim_results] If true,
 * interim results will be returned. Default: false.
 */
SpeechToText.prototype.observeResult = function(params, callback) {
  var missingParams = helper.getMissingParams(params, ['session_id', 'cookie_session']);
  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }
  var serviceUrl = [this._options.url, '/v1/sessions/',
    params.session_id, '/observe_result'].join('');
  var parts = url.parse(serviceUrl);
  var options = {
    agent: false,
    host: parts.hostname,
    port: parts.port,
    path: parts.pathname + (params.interim_results == true ? '?interim_results=true' : ''),
    method: 'GET',
    headers: {
      'Authorization': 'Basic ' + this._options.api_key,
      'cookie': 'SESSIONID=' + params.cookie_session,
      'Accept': 'application/json'
    }
  };
  var protocol = (parts.protocol.match('http:')) ? http : https;
  var req = protocol.request(options, function(result) {
    result.setEncoding('utf-8');
    result.on('data', function(chunk) {
      try {
        chunk = formatChunk(chunk);
      } catch (e) {
        callback(chunk);
        return;
      }
      callback(null, chunk);
    });
  });

  req.on('error', function(error) {
    callback(error);
  });

  req.end();

  return req;
};

/**
 * Get the state of the engine to check if recognize is available.
 * This is the way to check if the session is ready to accept a new recognition task.
 * The returned state has to be 'initialized' to be able to do recognize POST.
 *
 * @param {String} [params.session_id] Session used in the recognition.
 */
SpeechToText.prototype.getRecognizeStatus = function(params, callback) {
  var missingParams = helper.getMissingParams(params, ['session_id']);
  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }

  var path = params || {};
  var parameters = {
    options: {
      method: 'GET',
      url: '/v1/sessions/' + path.session_id + '/recognize',
      path: path,
      json: true
    },
    defaultOptions: this._options
  };
  return requestFactory(parameters, callback);
};

/**
 * List of models available.
 *
 */
SpeechToText.prototype.getModels = function(params, callback) {
  var parameters = {
    options: {
      method: 'GET',
      url: '/v1/models',
      path: params,
      json: true
    },
    defaultOptions: this._options
  };
  return requestFactory(parameters, callback);
};

/**
 * Get information about a model based on the given model_id
 * @param {String} [params.model_id] The desired model
 *
 */
SpeechToText.prototype.getModel = function(params, callback) {
  var path = params || {};

  var parameters = {
    options: {
      method: 'GET',
      url: '/v1/models/' + path.model_id,
      path: path,
      json: true
    },
    requiredParams: ['model_id'],
    defaultOptions: this._options
  };
  return requestFactory(parameters, callback);
};

/**
 * Create a session
 * Set-cookie header is returned with a cookie that must be used for
 * each request using this session.
 * The session expires after 15 minutes of inactivity.
 * @param string model The model to use during the session
 */
SpeechToText.prototype.createSession = function(params, callback) {
  var parameters = {
    options: {
      method: 'POST',
      url: '/v1/sessions',
      json: true,
      qs: params
    },
    defaultOptions: this._options
  };

  // Add the cookie_session to the response
  function addSessionId(cb) {
    return function(error, body, response) {
      if (error) {
        cb(error, body, response);
        return;
      }
      var cookies = cookie.parse(response.headers['set-cookie'][0]);
      body.cookie_session = cookies.SESSIONID;
      cb(error, body, response);
    };
  }

  return requestFactory(parameters, addSessionId(callback));
};

/**
 * Deletes the specified session.
 *
 * @param {String} [params.session_id] Session id.
 */
SpeechToText.prototype.deleteSession = function(params, callback) {
  var missingParams = helper.getMissingParams(params, ['session_id']);
  if (missingParams) {
    callback(new Error('Missing required parameters: ' + missingParams.join(', ')));
    return;
  }

  var parameters = {
    options: {
      method: 'DELETE',
      url: '/v1/sessions/' + params.session_id,
      json: true
    },
    defaultOptions: this._options
  };
  return requestFactory(parameters, callback);
};


function RecognizeStream(options){
  Duplex.call(this, options);

  var queryParams = extend({model: 'en-US_BroadbandModel'}, pick(options, ['model', 'X-Watson-Learning-Opt-Out', 'watson-token']));

  var openingMessage = extend({
    // todo: confirm the mixed underscores/hyphens and/or get it fixed
    action: 'start',
    'content-type': 'audio/wav', // todo: try to determine content-type from the file extension if available
    'continuous': false,
    'interim_results': true
  }, pick(options, ['continuous', 'max_alternatives', 'timestamps',
    'word_confidence', 'inactivity_timeout', 'content-type', 'interim_results']));

  var closingMessage = {action: 'stop'};

  var url = options.base_url.replace(/^http/, 'ws') + '/v1/recognize?' + qs.stringify(queryParams);

  this.listening = false;

  var client = this.client = new WebSocketClient();
  var self = this;

  // when the input stops, let the service know that we're done
  self.on('finish', function() {
    if (self.connection) {
      self.connection.sendUTF(JSON.stringify(closingMessage));
    } else {
      this.once('connect', function () {
        self.connection.sendUTF(JSON.stringify(closingMessage));
      });
    }
  });

  function emitError(msg, frame, err) {
    if (err) {
      err.message = msg + ' ' + err.message;
    } else {
      err = new Error(msg);
    }
    err.raw = frame;
    self.emit('error', err);
  }

  this.client.on('connectFailed', function(error) {
    self.emit('error', error);
  });

  this.client.on('connect', function(connection) {
    self.connection = connection;

    connection.on('error', function(error) {
      self.listening = false;
      self.emit('error', error);
    });

    connection.on('close', function(reasonCode, description) {
      self.listening = false;
      self.push(null);
      self.emit('connection-close', reasonCode, description);
    });

    connection.on('message', function(frame) {
      if (frame.type !== 'utf8') {
        return emitError('Unexpected binary data received from server', frame);
      }

      var data;
      try {
        data = JSON.parse(frame.utf8Data);
      } catch (jsonEx) {
        return emitError('Invalid JSON received from service:', frame, jsonEx);
      }

      if (data.error) {
        emitError(data.error, frame);
      } else if(data.state === 'listening') {
        // this is emitted both when the server is ready for audio, and after we send the close message to indicate that it's done processing
        if (!self.listening) {
          self.listening = true;
          self.emit('listening');
        } else {
          connection.close();
        }
      } else if (data.results) {
        self.emit('results', data);
        // note: currently there is always exactly 1 entry in the results array. However, this may change in the future.
        if(data.results[0].final && data.results[0].alternatives) {
          self.push(data.results[0].alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams
        }
      } else {
        emitError('Unrecognised message from server', frame);
      }
    });

    connection.sendUTF(JSON.stringify(openingMessage));

    self.emit('connect', connection);
  });

  //requestUrl, protocols, origin, headers, extraRequestOptions
  client.connect(url, null, null, options.headers, null);
}
util.inherits(RecognizeStream, Duplex);


RecognizeStream.prototype._read = function(size) {
  // there's no easy way to control reads from the underlying library
  // so, the best we can do here is a no-op
};

RecognizeStream.prototype._write = function(chunk, encoding, callback) {
  var self = this;
  if (this.listening) {
    this.connection.sendBytes(chunk, callback);
  } else {
    this.once('listening', function() {
      self.connection.sendBytes(chunk, callback);
    });
  }
};

/**
 * Replaces recognizeLive & friends with a single 2-way stream over websockets
 * @param params
 * @returns {*}
 */
SpeechToText.prototype.createRecognizeStream = function(params) {
  params = params || {};
  params.base_url = this._options.url;

  // todo: apply these corrections to other methods (?)
  if (params.content_type && !params['content-type']) {
    params['content-type'] = params.content_type;
  }

  if (params['X-WDC-PL-OPT-OUT'] && !params['X-Watson-Learning-Opt-Out']) {
    params['X-Watson-Learning-Opt-Out'] = params['X-WDC-PL-OPT-OUT'];
  }

  params.headers = extend({
    'user-agent': pkg.name + '-nodejs-'+ pkg.version,
    authorization:  'Basic ' + this._options.api_key
  }, params.headers);

  return new RecognizeStream(params);
};

// set up a warning message for the deprecated methods
['recognizeLive', 'observeResult'].forEach(function(name) {
  var original = SpeechToText.prototype[name];
  SpeechToText.prototype[name] = function deprecated(params) {
    if (!(params||{}).silent && !this._options.silent) {
      console.log(new Error('The ' + name + '() method is deprecated and will be removed from a future version of the watson-developer-cloud SDK. ' +
        'Please use createRecognizeStream() instead.\n(Set {silent: true} to hide this message.)'));
    }
    return original.apply(this, arguments);
  };
});

module.exports = SpeechToText;