osmo-cc-router/src/router/audio.c

/* audio handling
 *
 * (C) 2020 by Andreas Eversberg <jolly@eversberg.eu>
 * All Rights Reserved
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * Audio flow diagram:
 *
 * This diagrams shows the audio processing. The function for each processing
 * segment is given by the names ending with "()".
 *
 *
 *   receive_originator() receive_terminator()
 *            |                    |
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |jitter |            |jitter |
 *        |save   |            |save   |
 *        +-------+            +-------+
 *
 *
 *                    clock()
 *                      |
 *                      |
 *          ORIG       \|/        TERM
 *            +---------+----------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |jitter |            |jitter |
 *        |load   |            |load   |
 *        +-------+            +-------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |decode |            |decode |
 *        |       |            |       |
 *        +-------+            +-------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |int to |            |int to |
 *        |samples|            |samples|
 *        +-------+            +-------+
 *            |                    |
 * +------+   |                    |   +------+
 * |      |/  |                    |  \|      |
 * | DTMF |---|                    |---| DTMF |
 * |      |\  |                    |  /|      |
 * +------+   |                    |   +------+
 *            |                    |
 * +------+   |                    |   +------+
 * | WAVE |   |                    |   | WAVE |
 * |      |_  |                    |  _|      |
 * | PLAY | \ |                    | / | PLAY |
 * +------+  \|                    |/  +------+
 *            |                    |
 *            |      +------+      |
 *            |\     | WAVE |     /|
 *            | \____|      |____/ |
 *            |      |RECORD|      |
 *            |      +------+      |
 *            |                    |
 *    send_terminator()    send_originator()
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |  TX-  |            |  RX-  |
 *        |COMPRES|            |COMPRES|
 *        +-------+            +-------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |  TX-  |            |  RX-  |
 *        | GAIN  |            | GAIN  |
 *        +-------+            +-------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |samples|            |samples|
 *        |to int |            |to int |
 *        +-------+            +-------+
 *            |                    |
 *           \|/                  \|/
 *        +-------+            +-------+
 *        |encode |            |encode |
 *        |       |            |       |
 *        +-------+            +-------+
 *            |                    |
 *            |                    |
 *           \|/                  \|/
 *
 *           RTP                  RTP
 *
 * The clock triggers read from jitter buffer and replaces it with wave,
 * if playing. Also it record what is sent to originator and terminator,
 * if recording. This way the wave is included in the recording.
 *
 */

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <osmocom/core/select.h>
#include <osmocom/cc/session.h>
#include <osmocom/cc/rtp.h>
#include "../liblogging/logging.h"
#include "call.h"
#include "audio.h"

#define db2level(db)    pow(10, (double)db / 20.0)

void audio_init(void)
{
}

static void gain_samples(sample_t *samples, int length, double gain)
{
	double level = db2level(gain);
	int i;

	for (i = 0; i < length; i++)
		*samples++ *= level;
}

void receive_originator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
{
	call_relation_t *relation = codec->media->session->priv;
	call_t *call = relation->call;
	jitter_frame_t *jf;

	/* telephone-events */
	if (codec->decoder == decode_te) {
		uint8_t *data;
		int len;
		struct telephone_event *te;
		codec->decoder(payload, payload_len, &data, &len, relation);
		te = (struct telephone_event *)data;
		rx_telephone_event(relation, marker, te, 0);
		free(data);
		return;
	}

	/* store to originator jitter buffer */
	jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
	if (!jf)
		return;
	jitter_save(&call->orig_dejitter, jf);
}

void receive_terminator(struct osmo_cc_session_codec *codec, uint8_t marker, uint16_t sequence, uint32_t timestamp, uint32_t ssrc, uint8_t *payload, int payload_len)
{
	call_relation_t *relation = codec->media->session->priv;
	call_t *call = relation->call;
	jitter_frame_t *jf;

	/* ignore data from forking call */
	if (call->forking)
		return;

	/* telephone-events */
	if (codec->decoder == decode_te) {
		uint8_t *data;
		int len;
		struct telephone_event *te;
		codec->decoder(payload, payload_len, &data, &len, relation);
		te = (struct telephone_event *)data;
		rx_telephone_event(relation, marker, te, 1);
		free(data);
		return;
	}

	/* store to terminator jitter buffer */
	jf = jitter_frame_alloc(codec->decoder, relation, payload, payload_len, marker, sequence, timestamp, ssrc);
	if (!jf)
		return;
	jitter_save(&call->term_dejitter, jf);
}

static void send_originator(call_relation_t *relation, sample_t *samples, int len)
{
	int16_t spl[len];
	call_t *call = relation->call;
	uint8_t *payload;
	int payload_len;

	if (!relation->codec)
		return;

	/* compress */
	if (call->rx_compress)
		sendevolumenregler(&call->rx_compressor, samples, len);

	/* adjust gain */
	if (call->rx_gain)
		gain_samples(samples, len, call->rx_gain);

	/* convert samples to int16 */
	samples_to_int16_speech(spl, samples, len);

	/* encode and send via RTP */
	relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
	osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
	free(payload);
}

static void send_terminator(call_relation_t *relation, sample_t *samples, int len)
{
	int16_t spl[len];
	call_t *call = relation->call;
	uint8_t *payload;
	int payload_len;

	if (!relation->codec)
		return;

	/* compress */
	if (call->tx_compress)
		sendevolumenregler(&call->tx_compressor, samples, len);

	/* adjust gain */
	if (call->tx_gain)
		gain_samples(samples, len, call->tx_gain);

	/* convert samples to int16 */
	samples_to_int16_speech(spl, samples, len);

	/* encode and send via RTP */
	relation->codec->encoder((uint8_t *)spl, len * 2, &payload, &payload_len, relation);
	osmo_cc_rtp_send(relation->codec, payload, payload_len, 0, 1, len);
	free(payload);
}

void tx_telephone_event(call_relation_t *relation, uint8_t marker, struct telephone_event *te)
{
	uint8_t *payload;
	int payload_len;

	/* only if codec was negotiated */
	if (!relation->telephone_event)
		return;

	/* there should be a codec at this point; just to be safe */
	if (!relation->codec)
		return;

	/* encode and send via RTP */
	relation->telephone_event->encoder((uint8_t *)te, sizeof(*te), &payload, &payload_len, relation);
	osmo_cc_rtp_send_ts(relation->telephone_event, payload, payload_len, marker, relation->codec->media->tx_sequence, relation->codec->media->tx_timestamp);
}

int _play_wave(wave_play_t *play, sample_t *buffer, int len, int play_loop, const char *play_filename, double play_deviation)
{
	sample_t wbuffer[len], wbuffer2[len], *waves[2];
	int got = 0;
	int finished = 0;
	int rc;
	int i;

read_again:
	waves[0] = wbuffer + got;
	waves[1] = wbuffer2 + got;
	rc = wave_read(play, waves, len - got);
	got += rc;
	/* we have a short read (hit the end) or nothing to play left (hit the end without short read) */
	if (!play->left) {
		wave_destroy_playback(play);
		if (play_loop) {
			int samplerate = 0, channels = 0;
			int rc;
			rc = wave_create_playback(play, play_filename, &samplerate, &channels, play_deviation);
			if (rc >= 0)
				goto read_again;
		} else {
			/* notify routing about finished playback */
			finished = 1;
		}
	}
	/* in case wie do not get all samples filled, append silence */
	while (got < len) {
		wbuffer[got++] = 0.0;
		wbuffer2[got++] = 0.0;
	}
	/* convert stereo to mono */
	if (play->channels == 2) {
		for (i = 0; i < len; i++)
			buffer[i] = (wbuffer[i] + wbuffer2[i]) / 2.0;
	} else {
		for (i = 0; i < len; i++)
			buffer[i] = wbuffer[i];
	}

	return finished;
}

void call_clock(int len)
{
	call_t *call, *call_next;
	call_relation_t *relation;
	int16_t spl[len];
	sample_t orig_samples[len], term_samples[len], *samples[2] = { orig_samples, term_samples };
	int rc;

	for (call = call_list; call; call = call_next) {
		call_next = call->next;

		relation = call->relation_list;
		/* do we have no RTP proxy ? */
		if (!relation->cc_session || !relation->codec)
			continue;

		/* process originating audio */
		jitter_load_samples(&call->orig_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);

		/* convert to samples */
		int16_to_samples_speech(orig_samples, spl, len);

		/* dtmf decoding */
		if (relation->dtmf_dec_enable)
			dtmf_decode(&relation->dtmf_dec, orig_samples, len);

		if (!call->forking && relation->next) {
			/* process terminating audio */
			jitter_load_samples(&call->term_dejitter, (uint8_t *)spl, len, sizeof(*spl), jitter_conceal_s16, NULL);

			/* convert to samples */
			int16_to_samples_speech(term_samples, spl, len);

			/* dtmf decoding */
			if (relation->next->dtmf_dec_enable)
				dtmf_decode(&relation->next->dtmf_dec, term_samples, len);
		} else
			memset(term_samples, 0, len * sizeof(sample_t));

		/* play (overload data from jitter buffer) */
		if (call->orig_play.fp) {
			rc = _play_wave(&call->orig_play, term_samples, len, call->orig_play_loop, call->orig_play_filename, call->orig_play_deviation);
			if (call->routing.routing && rc)
				routing_send(&call->routing, "wave-finished");
		}
		if (call->term_play.fp) {
			rc = _play_wave(&call->term_play, orig_samples, len, call->term_play_loop, call->term_play_filename, call->term_play_deviation);
			if (call->routing.routing && rc)
				routing_send(&call->routing, "called-wave-finished");
		}

		/* record */
		if (call->rec.fp)
			wave_write(&call->rec, samples, len);

		/* forward audio */
		send_originator(relation, term_samples, len);
		if (!call->forking && relation->next)
			send_terminator(relation->next, orig_samples, len);
	}
}

void encode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
	uint16_t *src = (uint16_t *)src_data, *dst;
	int len = src_len / 2, i;

	dst = malloc(len * 2);
	if (!dst)
		return;
	for (i = 0; i < len; i++)
		dst[i] = htons(src[i]);
	*dst_data = (uint8_t *)dst;
	*dst_len = len * 2;
}

void decode_l16(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
	uint16_t *src = (uint16_t *)src_data, *dst;
	int len = src_len / 2, i;

	dst = malloc(len * 2);
	if (!dst)
		return;
	for (i = 0; i < len; i++)
		dst[i] = ntohs(src[i]);
	*dst_data = (uint8_t *)dst;
	*dst_len = len * 2;
}

void encode_te(uint8_t __attribute__((unused)) *src_data, int __attribute__((unused)) src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
	struct telephone_event *te = (void *)src_data;
	uint8_t *dst;

	if (src_len != sizeof(*te))
		return;

	dst = calloc(1, 4);
	if (!dst)
		return;
	dst[0] = te->event;
	dst[1] = te->e << 7;
	dst[1] |= te->r << 6;
	dst[1] |= (te->volume < 0x3f) ? te->volume : 0x3f;
	dst[2] |= te->duration >> 8;
	dst[3] |= te->duration;
	*dst_data = dst;
	*dst_len = 4;
}

void decode_te(uint8_t *src_data, int src_len, uint8_t **dst_data, int *dst_len, void __attribute__((unused)) *priv)
{
	uint8_t *src = src_data;
	struct telephone_event *te;

	if (src_len < 4)
		return;

	te = calloc(1, sizeof(*te));
	if (!te)
		return;
	te->event = src[0];
	te->e = src[1] >> 7;
	te->r = (src[1] >> 6) & 0x1;
	te->volume = src[1] & 0x3f;
	te->duration = (src[2] << 8) | src[3];
	*dst_data = (uint8_t *)te;
	*dst_len = sizeof(*te);
}