RTP Player: Visual waveform shows what is really played

Visual waveform is derived from decoded audio. When audio is decoded
incorrectly, waveform now shows it.
E.g. on issue 14401 is now audio play aligned with waveform, but it
exhibits that decoded audio is incorrect - about two times longer than
pcap!

Changes:
- samplefile_ renamed to sample_file_
- tempfile_ is renamed to temp_file_
- decode() is separated to decodeAudio and decodeVisual
- Frame info stores frame len and frame_num for every frame. We must hold
it per frame as it may change in time. Info is stored in separate temp file
as waveform samples.
This commit is contained in:
Jirka Novak 2021-03-20 19:05:42 +01:00 committed by Wireshark GitLab Utility
parent fdbe68e3f0
commit a123578fd5
2 changed files with 116 additions and 45 deletions

View File

@ -65,8 +65,10 @@ RtpAudioStream::RtpAudioStream(QObject *parent, rtpstream_info_t *rtpstream, boo
speex_resampler_skip_zeros(visual_resampler_);
QString tempname = QString("%1/wireshark_rtp_stream").arg(QDir::tempPath());
samplefile_ = new QTemporaryFile(tempname, this);
samplefile_->open();
sample_file_ = new QTemporaryFile(tempname, this);
sample_file_->open();
sample_file_frame_ = new QTemporaryFile(tempname, this);
sample_file_frame_->open();
// RTP_STREAM_DEBUG("Writing to %s", tempname.toUtf8().constData());
}
@ -83,8 +85,9 @@ RtpAudioStream::~RtpAudioStream()
if (audio_resampler_) speex_resampler_destroy (audio_resampler_);
speex_resampler_destroy (visual_resampler_);
rtpstream_id_free(&id_);
if (samplefile_) delete samplefile_;
// tempfile_ is released by audio_output_
if (sample_file_) delete sample_file_;
if (sample_file_frame_) delete sample_file_frame_;
// temp_file_ is released by audio_output_
if (audio_output_) delete audio_output_;
}
@ -154,10 +157,9 @@ void RtpAudioStream::reset(double global_start_time)
if (audio_resampler_) {
speex_resampler_reset_mem(audio_resampler_);
}
if (visual_resampler_) {
speex_resampler_reset_mem(visual_resampler_);
}
samplefile_->seek(0);
speex_resampler_reset_mem(visual_resampler_);
sample_file_->seek(0);
sample_file_frame_->seek(0);
}
AudioRouting RtpAudioStream::getAudioRouting()
@ -179,12 +181,20 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
{
if (rtp_packets_.size() < 1) return;
decodeAudio(out_device);
speex_resampler_reset_mem(visual_resampler_);
decodeVisual();
}
void RtpAudioStream::decodeAudio(QAudioDeviceInfo out_device)
{
// gtk/rtp_player.c:decode_rtp_stream
// XXX This is more messy than it should be.
gsize resample_buff_len = 0x1000;
SAMPLE *resample_buff = (SAMPLE *) g_malloc(resample_buff_len);
spx_uint32_t cur_in_rate = 0, visual_out_rate = 0;
spx_uint32_t cur_in_rate = 0;
char *write_buff = NULL;
qint64 write_bytes = 0;
unsigned channels = 0;
@ -200,13 +210,14 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
size_t decoded_bytes_prev = 0;
_rtp_packet_frame frame_info;
for (int cur_packet = 0; cur_packet < rtp_packets_.size(); cur_packet++) {
SAMPLE *decode_buff = NULL;
// XXX The GTK+ UI updates a progress bar here.
rtp_packet_t *rtp_packet = rtp_packets_[cur_packet];
stop_rel_time_ = start_rel_time_ + rtp_packet->arrive_offset;
speex_resampler_get_rate(visual_resampler_, &cur_in_rate, &visual_out_rate);
QString payload_name;
if (rtp_packet->info->info_payload_type_str) {
@ -261,10 +272,10 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
RTP_STREAM_DEBUG("Audio sample rate is %u", audio_out_rate_);
// Prepend silence to match our sibling streams.
samplefile_->seek(0);
qint64 prepend_samples = (start_rel_time_ - global_start_rel_time_) * audio_out_rate_;
if (prepend_samples > 0) {
writeSilence(prepend_samples);
sample_file_->seek(0);
prepend_samples_ = (start_rel_time_ - global_start_rel_time_) * audio_out_rate_;
if (prepend_samples_ > 0) {
writeSilence(prepend_samples_);
}
}
@ -302,8 +313,8 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
silence_timestamps_.append(stop_rel_time_);
decoded_bytes_prev = 0;
/* defined start_timestamp to avoid overflow in timestamp. TODO: handle the timestamp correctly */
/* XXX: if timestamps (RTP) are missing/ignored try use packet arrive time only (see also "rtp_time") */
/* defined start_timestamp to avoid overflow in timestamp. TODO: handle the timestamp correctly */
/* XXX: if timestamps (RTP) are missing/ignored try use packet arrive time only (see also "rtp_time") */
start_timestamp = rtp_packet->info->info_timestamp;
start_rtp_time = 0;
start_time = rtp_packet->arrive_offset;
@ -342,7 +353,7 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
arrive_time_prev = arrive_time;
}
// Write samples to our file.
// Prepare samples to write
write_buff = (char *) decode_buff;
write_bytes = decoded_bytes;
@ -359,7 +370,6 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
// Adjust rates if needed.
if (sample_rate != cur_in_rate) {
speex_resampler_set_rate(audio_resampler_, sample_rate, audio_out_rate);
speex_resampler_set_rate(visual_resampler_, sample_rate, visual_out_rate);
RTP_STREAM_DEBUG("Changed input rate from %u to %u Hz. Out is %u.", cur_in_rate, sample_rate, audio_out_rate_);
}
}
@ -377,23 +387,12 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
}
// Write the decoded, possibly-resampled audio to our temp file.
samplefile_->write(write_buff, write_bytes);
sample_file_->write(write_buff, write_bytes);
// Collect our visual samples.
spx_uint32_t in_len = (spx_uint32_t)rtp_packet->info->info_payload_len;
spx_uint32_t out_len = (visual_out_rate * in_len / sample_rate) + (visual_out_rate % sample_rate != 0);
if (out_len * SAMPLE_BYTES > resample_buff_len) {
while ((out_len * SAMPLE_BYTES > resample_buff_len))
resample_buff_len *= 2;
resample_buff = (SAMPLE *) g_realloc(resample_buff, resample_buff_len);
}
speex_resampler_process_int(visual_resampler_, 0, decode_buff, &in_len, resample_buff, &out_len);
for (unsigned i = 0; i < out_len; i++) {
packet_timestamps_[stop_rel_time_ + (double) i / visual_out_rate] = rtp_packet->frame_num;
if (qAbs(resample_buff[i]) > max_sample_val_) max_sample_val_ = qAbs(resample_buff[i]);
visual_samples_.append(resample_buff[i]);
}
// Record frame info to separate file
frame_info.len = write_bytes;
frame_info.frame_num = rtp_packet->frame_num;
sample_file_frame_->write((char *)&frame_info, sizeof(frame_info));
// Finally, write the resampled audio to our temp file and clean up.
g_free(decode_buff);
@ -401,6 +400,67 @@ void RtpAudioStream::decode(QAudioDeviceInfo out_device)
g_free(resample_buff);
}
// We preallocate buffer, 320 samples is enough for most scenarios
#define VISUAL_BUFF_LEN (320)
#define VISUAL_BUFF_BYTES (SAMPLE_BYTES * VISUAL_BUFF_LEN)
void RtpAudioStream::decodeVisual()
{
spx_uint32_t cur_in_rate = 0, visual_out_rate = 0;
guint64 read_bytes = 0;
spx_uint32_t read_len = 0;
gint32 read_buff_bytes = VISUAL_BUFF_BYTES;
SAMPLE *read_buff = (SAMPLE *) g_malloc(read_buff_bytes);
gint32 resample_buff_bytes = VISUAL_BUFF_BYTES;
SAMPLE *resample_buff = (SAMPLE *) g_malloc(resample_buff_bytes);
unsigned int sample_no = 0;
_rtp_packet_frame frame_info;
spx_uint32_t out_len;
speex_resampler_get_rate(visual_resampler_, &cur_in_rate, &visual_out_rate);
// Skip silence at begin of the stream
sample_file_->seek(prepend_samples_ * SAMPLE_BYTES);
sample_file_frame_->seek(0);
// Loop over every frame record
while(sample_file_frame_->read((char *)&frame_info, sizeof(frame_info))) {
// Resize buffer when needed
if (frame_info.len > read_buff_bytes) {
while ((frame_info.len > read_buff_bytes)) {
read_buff_bytes *= 2;
}
read_buff = (SAMPLE *) g_realloc(read_buff, read_buff_bytes);
}
read_bytes = sample_file_->read((char *)read_buff, frame_info.len);
read_len = (spx_uint32_t)read_bytes / SAMPLE_BYTES;
out_len = (visual_out_rate * read_len / audio_out_rate_) + (visual_out_rate % audio_out_rate_ != 0);
// Resize buffer when needed
if (out_len * SAMPLE_BYTES > (spx_uint32_t)resample_buff_bytes) {
while ((out_len * SAMPLE_BYTES > (spx_uint32_t)resample_buff_bytes)) {
resample_buff_bytes *= 2;
}
resample_buff = (SAMPLE *) g_realloc(resample_buff, resample_buff_bytes);
}
// Resample
speex_resampler_process_int(visual_resampler_, 0, read_buff, &read_len, resample_buff, &out_len);
// Create timestamp and visual sample
for (unsigned i = 0; i < out_len; i++) {
double time = start_rel_time_ + (double) sample_no / visual_out_rate;
packet_timestamps_[time] = frame_info.frame_num;
if (qAbs(resample_buff[i]) > max_sample_val_) max_sample_val_ = qAbs(resample_buff[i]);
visual_samples_.append(resample_buff[i]);
sample_no++;
}
}
g_free(resample_buff);
g_free(read_buff);
}
const QStringList RtpAudioStream::payloadNames() const
{
QStringList payload_names = payload_names_.values();
@ -587,8 +647,8 @@ bool RtpAudioStream::prepareForPlay(QAudioDeviceInfo out_device)
format.setCodec("audio/pcm");
// RTP_STREAM_DEBUG("playing %s %d samples @ %u Hz",
// samplefile_->fileName().toUtf8().constData(),
// (int) samplefile_->size(), audio_out_rate_);
// sample_file_->fileName().toUtf8().constData(),
// (int) sample_file_->size(), audio_out_rate_);
if (!out_device.isFormatSupported(format)) {
QString playback_error = tr("%1 does not support PCM at %2. Preferred format is %3")
@ -601,7 +661,7 @@ bool RtpAudioStream::prepareForPlay(QAudioDeviceInfo out_device)
start_pos = (qint64)(start_play_time_ * SAMPLE_BYTES * audio_out_rate_);
// Round to SAMPLE_BYTES boundary
start_pos = (start_pos / SAMPLE_BYTES) * SAMPLE_BYTES;
size = samplefile_->size();
size = sample_file_->size();
if (stereo_required_) {
// There is 2x more samples for stereo
start_pos *= 2;
@ -611,15 +671,15 @@ bool RtpAudioStream::prepareForPlay(QAudioDeviceInfo out_device)
int buffer_size;
// Start and stop audio with no connection to UI and store buffer size
tempfile_ = new AudioRoutingFilter(samplefile_, stereo_required_, audio_routing_);
tempfile_->seek(start_pos);
temp_file_ = new AudioRoutingFilter(sample_file_, stereo_required_, audio_routing_);
temp_file_->seek(start_pos);
audio_output_ = new QAudioOutput(out_device, format, this);
audio_output_->start(tempfile_);
audio_output_->start(temp_file_);
buffer_size = audio_output_->bufferSize();
audio_output_->stop();
// Start audio again with trippled buffer size
tempfile_->seek(start_pos);
temp_file_->seek(start_pos);
audio_output_->setBufferSize(buffer_size*3);
audio_output_->setNotifyInterval(100); // ~15 fps
connect(audio_output_, SIGNAL(stateChanged(QAudio::State)), this, SLOT(outputStateChanged(QAudio::State)));
@ -635,7 +695,7 @@ bool RtpAudioStream::prepareForPlay(QAudioDeviceInfo out_device)
void RtpAudioStream::startPlaying()
{
audio_output_->start(tempfile_);
audio_output_->start(temp_file_);
// QTBUG-6548 StoppedState is not always emitted on error, force a cleanup
// in case playback fails immediately.
if (audio_output_ && audio_output_->state() == QAudio::StoppedState) {
@ -675,7 +735,7 @@ void RtpAudioStream::writeSilence(qint64 samples)
char *silence_buff = (char *) g_malloc0(silence_bytes);
RTP_STREAM_DEBUG("Writing " G_GUINT64_FORMAT " silence samples", samples);
samplefile_->write(silence_buff, silence_bytes);
sample_file_->write(silence_buff, silence_bytes);
g_free(silence_buff);
// Silence is inserted to audio file only.

View File

@ -36,6 +36,12 @@ class QTemporaryFile;
struct _rtp_info;
struct _rtp_sample;
// Structure used for storing frame num during visual waveform decoding
typedef struct {
qint64 len;
guint32 frame_num;
} _rtp_packet_frame;
class RtpAudioStream : public QObject
{
Q_OBJECT
@ -150,8 +156,9 @@ private:
rtpstream_id_t id_;
QVector<struct _rtp_packet *>rtp_packets_;
QTemporaryFile *samplefile_;
QIODevice *tempfile_;
QTemporaryFile *sample_file_; // Stores waveform samples
QTemporaryFile *sample_file_frame_; // Stores _rtp_packet_frame per packet
QIODevice *temp_file_;
struct _GHashTable *decoders_hash_;
// TODO: It is not used
//QList<const rtpstream_info_t *>rtpstreams_;
@ -159,6 +166,7 @@ private:
double start_abs_offset_;
double start_rel_time_;
double stop_rel_time_;
qint64 prepend_samples_; // Count of silence samples to match other streams
AudioRouting audio_routing_;
bool stereo_required_;
quint32 audio_out_rate_;
@ -183,6 +191,9 @@ private:
const QString formatDescription(const QAudioFormat & format);
QString currentOutputDevice();
void decodeAudio(QAudioDeviceInfo out_device);
void decodeVisual();
private slots:
void outputStateChanged(QAudio::State new_state);
};