osmo-cc-router/src/router/audio.c

447 lines
13 KiB
C

/* audio handling
*
* (C) 2020 by Andreas Eversberg <jolly@eversberg.eu>
* All Rights Reserved
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Audio flow diagram:
*
* This diagrams shows the audio processing. The function for each processing
* segment is given by the names ending with "()".
*
*
* receive_originator() receive_terminator()
* | |
* | |
* \|/ \|/
* +-------+ +-------+
* |jitter | |jitter |
* |save | |save |
* +-------+ +-------+
*
*
* clock()
* |
* |
* ORIG \|/ TERM
* +---------+----------+
* | |
* \|/ \|/
* +-------+ +-------+
* |jitter | |jitter |
* |load | |load |
* +-------+ +-------+
* | |
* \|/ \|/
* +-------+ +-------+
* |decode | |decode |
* | | | |
* +-------+ +-------+
* | |
* \|/ \|/
* +-------+ +-------+
* |int to | |int to |
* |samples| |samples|
* +-------+ +-------+
* | |
* +------+ | | +------+
* | |/ | | \| |
* | DTMF |---| |---| DTMF |
* | |\ | | /| |
* +------+ | | +------+
* | |
* +------+ | | +------+
* | WAVE | | | | WAVE |
* | |_ | | _| |
* | PLAY | \ | | / | PLAY |
* +------+ \| |/ +------+
* | |
* | +------+ |
* |\ | WAVE | /|
* | \____| |____/ |
* | |RECORD| |
* | +------+ |
* | |
* send_terminator() send_originator()
* | |
* \|/ \|/
* +-------+ +-------+
* | TX- | | RX- |
* |COMPRES| |COMPRES|
* +-------+ +-------+
* | |
* \|/ \|/
* +-------+ +-------+
* | TX- | | RX- |
* | GAIN | | GAIN |
* +-------+ +-------+
* | |
* \|/ \|/
* +-------+ +-------+
* |samples| |samples|
* |to int | |to int |
* +-------+ +-------+
* | |
* \|/ \|/
* +-------+ +-------+
* |encode | |encode |
* | | | |
* +-------+ +-------+
* | |
* | |
* \|/ \|/
*
* RTP RTP
*
* The clock triggers read from jitter buffer and replaces it with wave,
* if playing. Also it record what is sent to originator and terminator,
* if recording. This way the wave is included in the recording.
*
*/
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <osmocom/core/select.h>
#include <osmocom/cc/session.h>
#include <osmocom/cc/rtp.h>
#include "../liblogging/logging.h"
#include "call.h"
#include "audio.h"
#define db2level(db) pow(10, (double)db / 20.0)
void audio_init(void)
{
}
static void gain_samples(sample_t *samples, int length, double gain)
{
double level = db2level(gain);
int i;
for (i = 0; i < length; i++)
*samples++ *= level;
}
void receive_originator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
{
call_relation_t *relation = codec->media->session->priv;
call_t *call = relation->call;
jitter_frame_t *jf;
/* telephone-events */
if (codec->decoder == decode_te) {
uint8_t *data;
int len;
struct telephone_event *te;
codec->decoder(payload, payload_len, &data, &len, relation);
te = (struct telephone_event *)data;
rx_telephone_event(relation, marker, te, 0);
free(data);
return;
}
/* store to originator jitter buffer */
jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
if (!jf)
return;
jitter_save(&call->orig_dejitter, jf);
}
void receive_terminator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
{
call_relation_t *relation = codec->media->session->priv;
call_t *call = relation->call;
jitter_frame_t *jf;
/* ignore data from forking call */
if (call->forking)
return;
/* telephone-events */
if (codec->decoder == decode_te) {
uint8_t *data;
int len;
struct telephone_event *te;
codec->decoder(payload, payload_len, &data, &len, relation);
te = (struct telephone_event *)data;
rx_telephone_event(relation, marker, te, 1);
free(data);
return;
}
/* store to terminator jitter buffer */
jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
if (!jf)
return;
jitter_save(&call->term_dejitter, jf);
}
static void send_originator(call_relation_t *relation, sample_t *samples, int len)
{
int16_t spl[len];
call_t *call = relation->call;
uint8_t *payload;
int payload_len;
if (!relation->codec)
return;
/* compress */
if (call->rx_compress)
sendevolumenregler(&call->rx_compressor, samples, len);
/* adjust gain */
if (call->rx_gain)
gain_samples(samples, len, call->rx_gain);
/* convert samples to int16 */
samples_to_int16_speech(spl, samples, len);
/* encode and send via RTP */
relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
free(payload);
}
static void send_terminator(call_relation_t *relation, sample_t *samples, int len)
{
int16_t spl[len];
call_t *call = relation->call;
uint8_t *payload;
int payload_len;
if (!relation->codec)
return;
/* compress */
if (call->tx_compress)
sendevolumenregler(&call->tx_compressor, samples, len);
/* adjust gain */
if (call->tx_gain)
gain_samples(samples, len, call->tx_gain);
/* convert samples to int16 */
samples_to_int16_speech(spl, samples, len);
/* encode and send via RTP */
relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
free(payload);
}
void tx_telephone_event(call_relation_t *relation, uint8_t marker, struct telephone_event *te)
{
uint8_t *payload;
int payload_len;
/* only if codec was negotiated */
if (!relation->telephone_event)
return;
/* there should be a codec at this point; just to be safe */
if (!relation->codec)
return;
/* encode and send via RTP */
relation->telephone_event->encoder((uint8_t *)te, sizeof(*te), &payload, &payload_len, relation);
osmo_cc_rtp_send_ts(relation->telephone_event, payload, payload_len, marker, relation->codec->media->tx_sequence, relation->codec->media->tx_timestamp);
}
int _play_wave(wave_play_t *play, sample_t *buffer, int len, int play_loop, const char *play_filename, double play_deviation)
{
sample_t wbuffer[len], wbuffer2[len], *waves[2];
int got = 0;
int finished = 0;
int rc;
int i;
read_again:
waves[0] = wbuffer + got;
waves[1] = wbuffer2 + got;
rc = wave_read(play, waves, len - got);
got += rc;
/* we have a short read (hit the end) or nothing to play left (hit the end without short read) */
if (!play->left) {
wave_destroy_playback(play);
if (play_loop) {
int samplerate = 0, channels = 0;
int rc;
rc = wave_create_playback(play, play_filename, &samplerate, &channels, play_deviation);
if (rc >= 0)
goto read_again;
} else {
/* notify routing about finished playback */
finished = 1;
}
}
/* in case wie do not get all samples filled, append silence */
while (got < len) {
wbuffer[got++] = 0.0;
wbuffer2[got++] = 0.0;
}
/* convert stereo to mono */
if (play->channels == 2) {
for (i = 0; i < len; i++)
buffer[i] = (wbuffer[i] + wbuffer2[i]) / 2.0;
} else {
for (i = 0; i < len; i++)
buffer[i] = wbuffer[i];
}
return finished;
}
void call_clock(int len)
{
call_t *call, *call_next;
call_relation_t *relation;
int16_t spl[len];
sample_t orig_samples[len], term_samples[len], *samples[2] = { orig_samples, term_samples };
int rc;
for (call = call_list; call; call = call_next) {
call_next = call->next;
relation = call->relation_list;
/* do we have no RTP proxy ? */
if (!relation->cc_session || !relation->codec)
continue;
/* process originating audio */
jitter_load_samples(&call->orig_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);
/* convert to samples */
int16_to_samples_speech(orig_samples, spl, len);
/* dtmf decoding */
if (relation->dtmf_dec_enable)
dtmf_decode(&relation->dtmf_dec, orig_samples, len);
if (!call->forking && relation->next) {
/* process terminating audio */
jitter_load_samples(&call->term_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);
/* convert to samples */
int16_to_samples_speech(term_samples, spl, len);
/* dtmf decoding */
if (relation->next->dtmf_dec_enable)
dtmf_decode(&relation->next->dtmf_dec, term_samples, len);
} else
memset(term_samples, 0, len * sizeof(sample_t));
/* play (overload data from jitter buffer) */
if (call->orig_play.fp) {
rc = _play_wave(&call->orig_play, term_samples, len, call->orig_play_loop, call->orig_play_filename, call->orig_play_deviation);
if (call->routing.routing && rc)
routing_send(&call->routing, "wave-finished");
}
if (call->term_play.fp) {
rc = _play_wave(&call->term_play, orig_samples, len, call->term_play_loop, call->term_play_filename, call->term_play_deviation);
if (call->routing.routing && rc)
routing_send(&call->routing, "called-wave-finished");
}
/* record */
if (call->rec.fp)
wave_write(&call->rec, samples, len);
/* forward audio */
send_originator(relation, term_samples, len);
if (!call->forking && relation->next)
send_terminator(relation->next, orig_samples, len);
}
}
void encode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
uint16_t *src = (uint16_t *)src_data, *dst;
int len = src_len / 2, i;
dst = malloc(len * 2);
if (!dst)
return;
for (i = 0; i < len; i++)
dst[i] = htons(src[i]);
*dst_data = (uint8_t *)dst;
*dst_len = len * 2;
}
void decode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
uint16_t *src = (uint16_t *)src_data, *dst;
int len = src_len / 2, i;
dst = malloc(len * 2);
if (!dst)
return;
for (i = 0; i < len; i++)
dst[i] = ntohs(src[i]);
*dst_data = (uint8_t *)dst;
*dst_len = len * 2;
}
void encode_te(uint8_t __attribute__((unused)) *src_data, int __attribute__((unused)) src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
struct telephone_event *te = (void *)src_data;
uint8_t *dst;
if (src_len != sizeof(*te))
return;
dst = calloc(1, 4);
if (!dst)
return;
dst[0] = te->event;
dst[1] = te->e << 7;
dst[1] |= te->r << 6;
dst[1] |= (te->volume < 0x3f) ? te->volume : 0x3f;
dst[2] |= te->duration >> 8;
dst[3] |= te->duration;
*dst_data = dst;
*dst_len = 4;
}
void decode_te(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
uint8_t *src = src_data;
struct telephone_event *te;
if (src_len < 4)
return;
te = calloc(1, sizeof(*te));
if (!te)
return;
te->event = src[0];
te->e = src[1] >> 7;
te->r = (src[1] >> 6) & 0x1;
te->volume = src[1] & 0x3f;
te->duration = (src[2] << 8) | src[3];
*dst_data = (uint8_t *)te;
*dst_len = sizeof(*te);
}