freeswitch/src/switch_core_speech.c

318 lines
8.7 KiB
C

/*
* FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
* Copyright (C) 2005-2014, Anthony Minessale II <anthm@freeswitch.org>
*
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
*
* The Initial Developer of the Original Code is
* Anthony Minessale II <anthm@freeswitch.org>
* Portions created by the Initial Developer are Copyright (C)
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Anthony Minessale II <anthm@freeswitch.org>
* Michael Jerris <mike@jerris.com>
* Paul D. Tinsley <pdt at jackhammer.org>
* Christopher M. Rienzo <chris@rienzo.com>
*
*
* switch_core_speech.c -- Main Core Library (speech functions)
*
*/
#include <switch.h>
#include "private/switch_core_pvt.h"
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
const char *module_name,
const char *voice_name,
unsigned int rate, unsigned int interval, unsigned int channels,
switch_speech_flag_t *flags, switch_memory_pool_t *pool)
{
switch_status_t status;
char buf[256] = "";
char *param = NULL;
if (!sh || !flags || zstr(module_name)) {
return SWITCH_STATUS_FALSE;
}
if (strchr(module_name, ':')) {
switch_set_string(buf, module_name);
if ((param = strchr(buf, ':'))) {
*param++ = '\0';
module_name = buf;
}
}
if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name);
return SWITCH_STATUS_GENERR;
}
sh->flags = *flags;
if (pool) {
sh->memory_pool = pool;
} else {
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
UNPROTECT_INTERFACE(sh->speech_interface);
return status;
}
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
sh->engine = switch_core_strdup(sh->memory_pool, module_name);
if (param) {
sh->param = switch_core_strdup(sh->memory_pool, param);
}
sh->rate = rate;
sh->name = switch_core_strdup(sh->memory_pool, module_name);
sh->samples = switch_samples_per_packet(rate, interval);
sh->samplerate = rate;
sh->native_rate = rate;
sh->channels = channels;
sh->real_channels = 1;
if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) {
switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
} else {
UNPROTECT_INTERFACE(sh->speech_interface);
}
return status;
}
SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, const char *text, switch_speech_flag_t *flags)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
char *param_string = NULL;
char *data = NULL;
char *ltext = NULL;
switch_assert(sh != NULL);
if (zstr(text)) {
status = SWITCH_STATUS_FALSE;
goto done;
}
/* Set TTS parameters from params in the text string
* Params are defined as follows {name1=val1,name2=val2,name3=val3}text to speak
*/
ltext = strdup(text);
data = ltext;
/* strip leading spaces */
while (data && *data == ' ') {
data++;
}
if (zstr(data)) {
status = SWITCH_STATUS_FALSE;
goto done;
}
/* extract params */
if (*data == '{') {
param_string = data + 1;
data = switch_find_end_paren(data, '{', '}');
if (zstr(data)) {
status = SWITCH_STATUS_FALSE;
goto done;
} else {
*data = '\0';
data++;
}
}
/* set TTS params */
if (!zstr(param_string)) {
char *param[256] = { 0 };
int i;
int argc = switch_separate_string(param_string, ',', param, (sizeof(param) / sizeof(param[0])));
for (i = 0; i < argc && param[i]; ++i) {
char *param_pair[2] = { 0 };
if (switch_separate_string(param[i], '=', param_pair, (sizeof(param_pair) / sizeof(param_pair[0]))) == 2) {
switch_core_speech_text_param_tts(sh, param_pair[0], param_pair[1]);
}
}
}
status = sh->speech_interface->speech_feed_tts(sh, data, flags);
done:
switch_safe_free(ltext);
return status;
}
SWITCH_DECLARE(void) switch_core_speech_flush_tts(switch_speech_handle_t *sh)
{
switch_assert(sh != NULL);
if (sh->speech_interface->speech_flush_tts) {
sh->speech_interface->speech_flush_tts(sh);
}
}
SWITCH_DECLARE(void) switch_core_speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val)
{
switch_assert(sh != NULL);
if (sh->speech_interface->speech_text_param_tts) {
sh->speech_interface->speech_text_param_tts(sh, param, val);
}
}
SWITCH_DECLARE(void) switch_core_speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val)
{
switch_assert(sh != NULL);
if (sh->speech_interface->speech_numeric_param_tts) {
sh->speech_interface->speech_numeric_param_tts(sh, param, val);
}
}
SWITCH_DECLARE(void) switch_core_speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val)
{
switch_assert(sh != NULL);
if (sh->speech_interface->speech_float_param_tts) {
sh->speech_interface->speech_float_param_tts(sh, param, val);
}
}
SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
{
switch_status_t status;
switch_size_t want, orig_len = *datalen;
switch_assert(sh != NULL);
want = *datalen;
top:
if (sh->buffer && (switch_buffer_inuse(sh->buffer) >= orig_len || switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE))) {
if ((*datalen = switch_buffer_read(sh->buffer, data, orig_len))) {
status = SWITCH_STATUS_SUCCESS;
goto done;
}
}
if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE)) {
switch_clear_flag(sh, SWITCH_SPEECH_FLAG_DONE);
*datalen = 0;
return SWITCH_STATUS_BREAK;
}
more:
*datalen = orig_len / sh->channels;
if ((status = sh->speech_interface->speech_read_tts(sh, data, datalen, flags)) != SWITCH_STATUS_SUCCESS) {
switch_set_flag(sh, SWITCH_SPEECH_FLAG_DONE);
goto top;
}
if (sh->native_rate && sh->samplerate && sh->native_rate != sh->samplerate) {
if (!sh->resampler) {
if (switch_resample_create(&sh->resampler,
sh->native_rate, sh->samplerate, (uint32_t) orig_len / sh->channels, SWITCH_RESAMPLE_QUALITY, 1) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Unable to create resampler!\n");
return SWITCH_STATUS_GENERR;
}
}
switch_resample_process(sh->resampler, data, (uint32_t)(*datalen / 2));
if (sh->resampler->to_len < want / 2 || sh->resampler->to_len > orig_len / 2) {
if (!sh->buffer) {
int factor = sh->resampler->to_len * sh->samplerate / 1000;
switch_buffer_create_dynamic(&sh->buffer, factor, factor, 0);
switch_assert(sh->buffer);
}
if (!sh->dbuf || sh->dbuflen < sh->resampler->to_len * 2) {
sh->dbuflen = sh->resampler->to_len * 2;
sh->dbuf = switch_core_alloc(sh->memory_pool, sh->dbuflen);
}
switch_assert(sh->resampler->to_len <= sh->dbuflen);
memcpy((int16_t *) sh->dbuf, sh->resampler->to, sh->resampler->to_len * 2);
switch_buffer_write(sh->buffer, sh->dbuf, sh->resampler->to_len * 2);
if (switch_buffer_inuse(sh->buffer) < want) {
*datalen = want;
goto more;
}
*datalen = switch_buffer_read(sh->buffer, data, orig_len);
status = SWITCH_STATUS_SUCCESS;
} else {
memcpy(data, sh->resampler->to, sh->resampler->to_len * 2);
*datalen = sh->resampler->to_len * 2;
status = SWITCH_STATUS_SUCCESS;
}
}
done:
if (sh->channels != sh->real_channels) {
uint32_t rlen = *datalen / 2;
switch_mux_channels((int16_t *) data, rlen, 1, sh->channels);
*datalen = rlen * 2 * sh->channels;
}
return status;
}
SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags)
{
switch_status_t status = sh->speech_interface->speech_close(sh, flags);
if (!switch_test_flag(sh, SWITCH_SPEECH_FLAG_OPEN)) {
return SWITCH_STATUS_FALSE;
}
if (sh->buffer) {
switch_buffer_destroy(&sh->buffer);
}
switch_resample_destroy(&sh->resampler);
UNPROTECT_INTERFACE(sh->speech_interface);
if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL)) {
switch_core_destroy_memory_pool(&sh->memory_pool);
}
switch_clear_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
return status;
}
/* For Emacs:
* Local Variables:
* mode:c
* indent-tabs-mode:t
* tab-width:4
* c-basic-offset:4
* End:
* For VIM:
* vim:set softtabstop=4 shiftwidth=4 tabstop=4 noet:
*/