447 lines
13 KiB
C
447 lines
13 KiB
C
/* audio handling
|
|
*
|
|
* (C) 2020 by Andreas Eversberg <jolly@eversberg.eu>
|
|
* All Rights Reserved
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/*
|
|
* Audio flow diagram:
|
|
*
|
|
* This diagrams shows the audio processing. The function for each processing
|
|
* segment is given by the names ending with "()".
|
|
*
|
|
*
|
|
* receive_originator() receive_terminator()
|
|
* | |
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |jitter | |jitter |
|
|
* |save | |save |
|
|
* +-------+ +-------+
|
|
*
|
|
*
|
|
* clock()
|
|
* |
|
|
* |
|
|
* ORIG \|/ TERM
|
|
* +---------+----------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |jitter | |jitter |
|
|
* |load | |load |
|
|
* +-------+ +-------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |decode | |decode |
|
|
* | | | |
|
|
* +-------+ +-------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |int to | |int to |
|
|
* |samples| |samples|
|
|
* +-------+ +-------+
|
|
* | |
|
|
* +------+ | | +------+
|
|
* | |/ | | \| |
|
|
* | DTMF |---| |---| DTMF |
|
|
* | |\ | | /| |
|
|
* +------+ | | +------+
|
|
* | |
|
|
* +------+ | | +------+
|
|
* | WAVE | | | | WAVE |
|
|
* | |_ | | _| |
|
|
* | PLAY | \ | | / | PLAY |
|
|
* +------+ \| |/ +------+
|
|
* | |
|
|
* | +------+ |
|
|
* |\ | WAVE | /|
|
|
* | \____| |____/ |
|
|
* | |RECORD| |
|
|
* | +------+ |
|
|
* | |
|
|
* send_terminator() send_originator()
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* | TX- | | RX- |
|
|
* |COMPRES| |COMPRES|
|
|
* +-------+ +-------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* | TX- | | RX- |
|
|
* | GAIN | | GAIN |
|
|
* +-------+ +-------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |samples| |samples|
|
|
* |to int | |to int |
|
|
* +-------+ +-------+
|
|
* | |
|
|
* \|/ \|/
|
|
* +-------+ +-------+
|
|
* |encode | |encode |
|
|
* | | | |
|
|
* +-------+ +-------+
|
|
* | |
|
|
* | |
|
|
* \|/ \|/
|
|
*
|
|
* RTP RTP
|
|
*
|
|
* The clock triggers read from jitter buffer and replaces it with wave,
|
|
* if playing. Also it record what is sent to originator and terminator,
|
|
* if recording. This way the wave is included in the recording.
|
|
*
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <math.h>
|
|
#include <sys/types.h>
|
|
#include <arpa/inet.h>
|
|
#include <osmocom/core/select.h>
|
|
#include <osmocom/cc/session.h>
|
|
#include <osmocom/cc/rtp.h>
|
|
#include "../liblogging/logging.h"
|
|
#include "call.h"
|
|
#include "audio.h"
|
|
|
|
#define db2level(db) pow(10, (double)db / 20.0)
|
|
|
|
void audio_init(void)
|
|
{
|
|
}
|
|
|
|
static void gain_samples(sample_t *samples, int length, double gain)
|
|
{
|
|
double level = db2level(gain);
|
|
int i;
|
|
|
|
for (i = 0; i < length; i++)
|
|
*samples++ *= level;
|
|
}
|
|
|
|
void receive_originator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
|
|
{
|
|
call_relation_t *relation = codec->media->session->priv;
|
|
call_t *call = relation->call;
|
|
jitter_frame_t *jf;
|
|
|
|
/* telephone-events */
|
|
if (codec->decoder == decode_te) {
|
|
uint8_t *data;
|
|
int len;
|
|
struct telephone_event *te;
|
|
codec->decoder(payload, payload_len, &data, &len, relation);
|
|
te = (struct telephone_event *)data;
|
|
rx_telephone_event(relation, marker, te, 0);
|
|
free(data);
|
|
return;
|
|
}
|
|
|
|
/* store to originator jitter buffer */
|
|
jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
|
|
if (!jf)
|
|
return;
|
|
jitter_save(&call->orig_dejitter, jf);
|
|
}
|
|
|
|
void receive_terminator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
|
|
{
|
|
call_relation_t *relation = codec->media->session->priv;
|
|
call_t *call = relation->call;
|
|
jitter_frame_t *jf;
|
|
|
|
/* ignore data from forking call */
|
|
if (call->forking)
|
|
return;
|
|
|
|
/* telephone-events */
|
|
if (codec->decoder == decode_te) {
|
|
uint8_t *data;
|
|
int len;
|
|
struct telephone_event *te;
|
|
codec->decoder(payload, payload_len, &data, &len, relation);
|
|
te = (struct telephone_event *)data;
|
|
rx_telephone_event(relation, marker, te, 1);
|
|
free(data);
|
|
return;
|
|
}
|
|
|
|
/* store to terminator jitter buffer */
|
|
jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
|
|
if (!jf)
|
|
return;
|
|
jitter_save(&call->term_dejitter, jf);
|
|
}
|
|
|
|
static void send_originator(call_relation_t *relation, sample_t *samples, int len)
|
|
{
|
|
int16_t spl[len];
|
|
call_t *call = relation->call;
|
|
uint8_t *payload;
|
|
int payload_len;
|
|
|
|
if (!relation->codec)
|
|
return;
|
|
|
|
/* compress */
|
|
if (call->rx_compress)
|
|
sendevolumenregler(&call->rx_compressor, samples, len);
|
|
|
|
/* adjust gain */
|
|
if (call->rx_gain)
|
|
gain_samples(samples, len, call->rx_gain);
|
|
|
|
/* convert samples to int16 */
|
|
samples_to_int16_speech(spl, samples, len);
|
|
|
|
/* encode and send via RTP */
|
|
relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
|
|
osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
|
|
free(payload);
|
|
}
|
|
|
|
static void send_terminator(call_relation_t *relation, sample_t *samples, int len)
|
|
{
|
|
int16_t spl[len];
|
|
call_t *call = relation->call;
|
|
uint8_t *payload;
|
|
int payload_len;
|
|
|
|
if (!relation->codec)
|
|
return;
|
|
|
|
/* compress */
|
|
if (call->tx_compress)
|
|
sendevolumenregler(&call->tx_compressor, samples, len);
|
|
|
|
/* adjust gain */
|
|
if (call->tx_gain)
|
|
gain_samples(samples, len, call->tx_gain);
|
|
|
|
/* convert samples to int16 */
|
|
samples_to_int16_speech(spl, samples, len);
|
|
|
|
/* encode and send via RTP */
|
|
relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
|
|
osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
|
|
free(payload);
|
|
}
|
|
|
|
void tx_telephone_event(call_relation_t *relation, uint8_t marker, struct telephone_event *te)
|
|
{
|
|
uint8_t *payload;
|
|
int payload_len;
|
|
|
|
/* only if codec was negotiated */
|
|
if (!relation->telephone_event)
|
|
return;
|
|
|
|
/* there should be a codec at this point; just to be safe */
|
|
if (!relation->codec)
|
|
return;
|
|
|
|
/* encode and send via RTP */
|
|
relation->telephone_event->encoder((uint8_t *)te, sizeof(*te), &payload, &payload_len, relation);
|
|
osmo_cc_rtp_send_ts(relation->telephone_event, payload, payload_len, marker, relation->codec->media->tx_sequence, relation->codec->media->tx_timestamp);
|
|
}
|
|
|
|
int _play_wave(wave_play_t *play, sample_t *buffer, int len, int play_loop, const char *play_filename, double play_deviation)
|
|
{
|
|
sample_t wbuffer[len], wbuffer2[len], *waves[2];
|
|
int got = 0;
|
|
int finished = 0;
|
|
int rc;
|
|
int i;
|
|
|
|
read_again:
|
|
waves[0] = wbuffer + got;
|
|
waves[1] = wbuffer2 + got;
|
|
rc = wave_read(play, waves, len - got);
|
|
got += rc;
|
|
/* we have a short read (hit the end) or nothing to play left (hit the end without short read) */
|
|
if (!play->left) {
|
|
wave_destroy_playback(play);
|
|
if (play_loop) {
|
|
int samplerate = 0, channels = 0;
|
|
int rc;
|
|
rc = wave_create_playback(play, play_filename, &samplerate, &channels, play_deviation);
|
|
if (rc >= 0)
|
|
goto read_again;
|
|
} else {
|
|
/* notify routing about finished playback */
|
|
finished = 1;
|
|
}
|
|
}
|
|
/* in case wie do not get all samples filled, append silence */
|
|
while (got < len) {
|
|
wbuffer[got++] = 0.0;
|
|
wbuffer2[got++] = 0.0;
|
|
}
|
|
/* convert stereo to mono */
|
|
if (play->channels == 2) {
|
|
for (i = 0; i < len; i++)
|
|
buffer[i] = (wbuffer[i] + wbuffer2[i]) / 2.0;
|
|
} else {
|
|
for (i = 0; i < len; i++)
|
|
buffer[i] = wbuffer[i];
|
|
}
|
|
|
|
return finished;
|
|
}
|
|
|
|
void call_clock(int len)
|
|
{
|
|
call_t *call, *call_next;
|
|
call_relation_t *relation;
|
|
int16_t spl[len];
|
|
sample_t orig_samples[len], term_samples[len], *samples[2] = { orig_samples, term_samples };
|
|
int rc;
|
|
|
|
for (call = call_list; call; call = call_next) {
|
|
call_next = call->next;
|
|
|
|
relation = call->relation_list;
|
|
/* do we have no RTP proxy ? */
|
|
if (!relation->cc_session || !relation->codec)
|
|
continue;
|
|
|
|
/* process originating audio */
|
|
jitter_load_samples(&call->orig_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);
|
|
|
|
/* convert to samples */
|
|
int16_to_samples_speech(orig_samples, spl, len);
|
|
|
|
/* dtmf decoding */
|
|
if (relation->dtmf_dec_enable)
|
|
dtmf_decode(&relation->dtmf_dec, orig_samples, len);
|
|
|
|
if (!call->forking && relation->next) {
|
|
/* process terminating audio */
|
|
jitter_load_samples(&call->term_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);
|
|
|
|
/* convert to samples */
|
|
int16_to_samples_speech(term_samples, spl, len);
|
|
|
|
/* dtmf decoding */
|
|
if (relation->next->dtmf_dec_enable)
|
|
dtmf_decode(&relation->next->dtmf_dec, term_samples, len);
|
|
} else
|
|
memset(term_samples, 0, len * sizeof(sample_t));
|
|
|
|
/* play (overload data from jitter buffer) */
|
|
if (call->orig_play.fp) {
|
|
rc = _play_wave(&call->orig_play, term_samples, len, call->orig_play_loop, call->orig_play_filename, call->orig_play_deviation);
|
|
if (call->routing.routing && rc)
|
|
routing_send(&call->routing, "wave-finished");
|
|
}
|
|
if (call->term_play.fp) {
|
|
rc = _play_wave(&call->term_play, orig_samples, len, call->term_play_loop, call->term_play_filename, call->term_play_deviation);
|
|
if (call->routing.routing && rc)
|
|
routing_send(&call->routing, "called-wave-finished");
|
|
}
|
|
|
|
/* record */
|
|
if (call->rec.fp)
|
|
wave_write(&call->rec, samples, len);
|
|
|
|
/* forward audio */
|
|
send_originator(relation, term_samples, len);
|
|
if (!call->forking && relation->next)
|
|
send_terminator(relation->next, orig_samples, len);
|
|
}
|
|
}
|
|
|
|
void encode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
|
|
{
|
|
uint16_t *src = (uint16_t *)src_data, *dst;
|
|
int len = src_len / 2, i;
|
|
|
|
dst = malloc(len * 2);
|
|
if (!dst)
|
|
return;
|
|
for (i = 0; i < len; i++)
|
|
dst[i] = htons(src[i]);
|
|
*dst_data = (uint8_t *)dst;
|
|
*dst_len = len * 2;
|
|
}
|
|
|
|
void decode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
|
|
{
|
|
uint16_t *src = (uint16_t *)src_data, *dst;
|
|
int len = src_len / 2, i;
|
|
|
|
dst = malloc(len * 2);
|
|
if (!dst)
|
|
return;
|
|
for (i = 0; i < len; i++)
|
|
dst[i] = ntohs(src[i]);
|
|
*dst_data = (uint8_t *)dst;
|
|
*dst_len = len * 2;
|
|
}
|
|
|
|
void encode_te(uint8_t __attribute__((unused)) *src_data, int __attribute__((unused)) src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
|
|
{
|
|
struct telephone_event *te = (void *)src_data;
|
|
uint8_t *dst;
|
|
|
|
if (src_len != sizeof(*te))
|
|
return;
|
|
|
|
dst = calloc(1, 4);
|
|
if (!dst)
|
|
return;
|
|
dst[0] = te->event;
|
|
dst[1] = te->e << 7;
|
|
dst[1] |= te->r << 6;
|
|
dst[1] |= (te->volume < 0x3f) ? te->volume : 0x3f;
|
|
dst[2] |= te->duration >> 8;
|
|
dst[3] |= te->duration;
|
|
*dst_data = dst;
|
|
*dst_len = 4;
|
|
}
|
|
|
|
void decode_te(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
|
|
{
|
|
uint8_t *src = src_data;
|
|
struct telephone_event *te;
|
|
|
|
if (src_len < 4)
|
|
return;
|
|
|
|
te = calloc(1, sizeof(*te));
|
|
if (!te)
|
|
return;
|
|
te->event = src[0];
|
|
te->e = src[1] >> 7;
|
|
te->r = (src[1] >> 6) & 0x1;
|
|
te->volume = src[1] & 0x3f;
|
|
te->duration = (src[2] << 8) | src[3];
|
|
*dst_data = (uint8_t *)te;
|
|
*dst_len = sizeof(*te);
|
|
}
|
|
|