From ff500be2c838875e6df7e57713e3c77ef94ad88e Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Thu, 10 Oct 2024 10:38:15 +0200 Subject: [PATCH] clean up media source --- src/app/AudioReceiver.h | 145 ++++++++++++++++++++++++++++++++ src/app/Source.h | 182 ++++++---------------------------------- src/app/VideoReceiver.h | 98 ++++++++++++++++++++++ 3 files changed, 268 insertions(+), 157 deletions(-) create mode 100644 src/app/AudioReceiver.h create mode 100644 src/app/VideoReceiver.h diff --git a/src/app/AudioReceiver.h b/src/app/AudioReceiver.h new file mode 100644 index 0000000..50bb340 --- /dev/null +++ b/src/app/AudioReceiver.h @@ -0,0 +1,145 @@ +#ifndef TEST_APP_AUDIORECEIVER_H_ +#define TEST_APP_AUDIORECEIVER_H_ + +#include +extern "C" { +#include +} + +#include "AudioFrameSnapshot.h" +#include "Clock.h" +#include "../ffmpeg/Decoder.h" +#include "../ffmpeg/Encoder.h" +#include "../ffmpeg/Frame.h" +#include "../ffmpeg/InputContext.h" +#include "../ffmpeg/Packet.h" +#include "../ffmpeg/Resampler.h" + +namespace app { + +class AudioReceiver { + +public: + explicit AudioReceiver(ffmpeg::InputContext &input) + : stream(input.FindAudioStream()) + , decoder(stream.GetCodecId()) + , encoder(AV_CODEC_ID_PCM_F32LE) + , ready(false) { + decoder.ReadParameters(stream.GetParameters()); + decoder.SetTimeBase(stream.GetTimeBase()); + decoder.Open(); + encoder.SetDefaultChannelLayout(2); + encoder.SetSampleRate(48000); + encoder.SetSampleFormat(AV_SAMPLE_FMT_FLT); + encoder.Open(); + resampler.SetOpt("in_channel_count", decoder.GetChannelLayout().nb_channels); + resampler.SetOpt("in_sample_rate", decoder.GetSampleRate()); + resampler.SetOpt("in_sample_fmt", decoder.GetSampleFormat()); + resampler.SetOpt("out_channel_count", encoder.GetChannelLayout().nb_channels); + resampler.SetOpt("out_sample_rate", encoder.GetSampleRate()); + resampler.SetOpt("out_sample_fmt", encoder.GetSampleFormat()); + resampler.Init(); + if (encoder.GetFrameSize() > 0) { + output_frame.AllocateAudio(encoder.GetFrameSize(), encoder.GetSampleFormat(), encoder.GetChannelLayout()); + } else { + output_frame.AllocateAudio(decoder.GetFrameSize(), encoder.GetSampleFormat(), encoder.GetChannelLayout()); + } + clock = Clock(encoder.GetTimeBase()); + } + ~AudioReceiver() { + } + + AudioReceiver(const AudioReceiver &) = delete; + AudioReceiver &operator =(const AudioReceiver &) = delete; + +public: + void DiscardExpired(int64_t sample_head) { + while (!buffer.empty() && buffer.front().GetEndTime().GetCounter() < sample_head) { + buffer.pop_front(); + } + } + + int64_t GetSampleEnd() const { + return clock.GetCounter() + encoder.GetFrameSize(); + } + + int GetStreamIndex() const { + return stream.GetIndex(); + } + + bool IsEOF() const { + return decoder.IsEOF(); + } + + bool Ready() const { + return ready; + } + + void Send(const ffmpeg::Packet &packet) { + decoder.SendPacket(packet); + while (Receive()) { + } + } + + void Flush() { + decoder.Flush(); + while (Receive()) { + } + } + + bool Receive() { + bool res = decoder.ReceiveFrame(input_frame); + if (res) { + ready = true; + int converted = resampler.Convert(encoder, input_frame, output_frame); + Buffer(converted); + clock.Advance(converted); + } + return res; + } + + void Buffer(int size) { + const float *plane = reinterpret_cast(output_frame.GetDataPlane(0)); + int channels = encoder.GetChannelLayout().nb_channels; + Clock time = clock.Snapshot(); + buffer.emplace_back(plane, channels, size, time); + } + + void Mix(const Clock &clock, float *plane, int channels, int frame_size) const { + int64_t out_begin = clock.GetCounter(); + int64_t out_end = out_begin + frame_size; + for (const AudioFrameSnapshot &frame : buffer) { + int64_t frame_begin = frame.GetStartTime().GetCounter(); + int64_t frame_end = frame_begin + frame.GetSize(); + if (frame_begin >= out_end) continue; + if (frame_end < out_begin) continue; + int64_t src_offset = std::max(int64_t(0), out_begin - frame_begin); + int64_t dst_offset = std::max(int64_t(0), frame_begin - out_begin); + int64_t start = std::max(out_begin, frame_begin); + int64_t end = std::min(out_end, frame_end); + int64_t size = end - start; + int chans = std::min(channels, frame.GetChannels()); + for (int64_t sample = 0; sample < size; ++sample) { + for (int channel = 0; channel < chans; ++channel) { + plane[(sample + dst_offset) * channels + channel] += frame.GetSample(sample + src_offset, channel); + } + } + } + } + +private: + ffmpeg::Stream stream; + ffmpeg::Decoder decoder; + ffmpeg::Frame input_frame; + ffmpeg::Encoder encoder; + ffmpeg::Resampler resampler; + ffmpeg::Frame output_frame; + std::list buffer; + Clock clock; + bool ready; + +}; + +} + +#endif diff --git a/src/app/Source.h b/src/app/Source.h index 7a868b8..8893798 100644 --- a/src/app/Source.h +++ b/src/app/Source.h @@ -1,30 +1,20 @@ #ifndef TEST_APP_SOURCE_H_ #define TEST_APP_SOURCE_H_ -#include #include #include -#include -#include extern "C" { -#include +#include #include -#include -#include -#include +#include } -#include "AudioFrameSnapshot.h" +#include "AudioReceiver.h" #include "Clock.h" +#include "VideoReceiver.h" #include "../cairo/Surface.h" -#include "../ffmpeg/Decoder.h" -#include "../ffmpeg/Encoder.h" -#include "../ffmpeg/Frame.h" #include "../ffmpeg/InputContext.h" #include "../ffmpeg/Packet.h" -#include "../ffmpeg/Resampler.h" -#include "../ffmpeg/Scaler.h" -#include "../ffmpeg/Stream.h" namespace app { @@ -33,45 +23,8 @@ class Source { public: explicit Source(const char *url) : input(url) - , audio_stream(input.FindAudioStream()) - , video_stream(input.FindVideoStream()) - , audio_decoder(audio_stream.GetCodecId()) - , video_decoder(video_stream.GetCodecId()) - , audio_encoder(AV_CODEC_ID_PCM_F32LE) - , seen_audio(false) - , seen_video(false) { - audio_decoder.ReadParameters(audio_stream.GetParameters()); - audio_decoder.SetTimeBase(audio_stream.GetTimeBase()); - audio_decoder.Open(); - video_decoder.ReadParameters(video_stream.GetParameters()); - video_decoder.SetTimeBase(video_stream.GetTimeBase()); - video_decoder.Open(); - audio_encoder.SetDefaultChannelLayout(2); - audio_encoder.SetSampleRate(48000); - audio_encoder.SetSampleFormat(AV_SAMPLE_FMT_FLT); - audio_encoder.Open(); - resampler.SetOpt("in_channel_count", audio_decoder.GetChannelLayout().nb_channels); - resampler.SetOpt("in_sample_rate", audio_decoder.GetSampleRate()); - resampler.SetOpt("in_sample_fmt", audio_decoder.GetSampleFormat()); - resampler.SetOpt("out_channel_count", audio_encoder.GetChannelLayout().nb_channels); - resampler.SetOpt("out_sample_rate", audio_encoder.GetSampleRate()); - resampler.SetOpt("out_sample_fmt", audio_encoder.GetSampleFormat()); - resampler.Init(); - scaler.SetOpt("srcw", video_decoder.GetWidth()); - scaler.SetOpt("srch", video_decoder.GetHeight()); - scaler.SetOpt("src_format", video_decoder.GetPixelFormat()); - scaler.SetOpt("dstw", video_decoder.GetWidth()); - scaler.SetOpt("dsth", video_decoder.GetHeight()); - scaler.SetOpt("dst_format", AV_PIX_FMT_BGRA); - scaler.Init(); - if (audio_encoder.GetFrameSize() > 0) { - audio_output_frame.AllocateAudio(audio_encoder.GetFrameSize(), audio_encoder.GetSampleFormat(), audio_encoder.GetChannelLayout()); - } else { - audio_output_frame.AllocateAudio(audio_decoder.GetFrameSize(), audio_encoder.GetSampleFormat(), audio_encoder.GetChannelLayout()); - } - video_output_frame.AllocateImage(video_decoder.GetWidth(), video_decoder.GetHeight(), AV_PIX_FMT_BGRA); - audio_clock = Clock(audio_encoder.GetTimeBase()); - video_clock = Clock(video_stream.GetTimeBase()); + , audio(input) + , video(input) { } ~Source() { } @@ -81,149 +34,64 @@ public: public: void SeekAudio(const Clock &target, int frame_size) { - while (audio_clock.GetCounter() + audio_encoder.GetFrameSize() < target.GetCounter() + frame_size && !audio_decoder.IsEOF()) { - if (!ReceiveAudio()) { + while (!audio.IsEOF() && audio.GetSampleEnd() < target.GetCounter() + frame_size) { + if (!audio.Receive()) { + // TODO: this could potentially discard unexposed video frames PullPacket(); } } - while (!audio_buffer.empty() && audio_buffer.front().GetEndTime().GetCounter() < target.GetCounter()) { - audio_buffer.pop_front(); - } + audio.DiscardExpired(target.GetCounter()); } void SeekVideo(const Clock &target) { - while (video_clock.GetMS() < target.GetMS() && !video_decoder.IsEOF()) { - if (!ReceiveVideo()) { + while (!video.IsEOF() && video.GetClock().GetMS() < target.GetMS()) { + if (!video.Receive()) { PullPacket(); } } } bool HasSeenAudio() const { - return seen_audio; + return audio.Ready(); } bool HasSeenVideo() const { - return seen_video; + return video.Ready(); } bool IsEOF() const { - return audio_decoder.IsEOF() && video_decoder.IsEOF(); - } - - const AudioFrameSnapshot &CurrentAudioFrame() const { - return audio_buffer.front(); - } - - void DropAudioFrame() { - audio_buffer.pop_front(); + return audio.IsEOF() && video.IsEOF(); } cairo::Surface GetVideoSurface() { - return cairo::Surface( - video_output_frame.GetDataPlane(0), video_output_frame.GetPlaneLinesize(0), CAIRO_FORMAT_ARGB32, - video_decoder.GetWidth(), video_decoder.GetHeight() - ); + return video.GetSurface(); } void Mix(const Clock &clock, float *plane, int channels, int frame_size) const { - int64_t out_begin = clock.GetCounter(); - int64_t out_end = out_begin + frame_size; - int written = 0; - for (const AudioFrameSnapshot &frame : audio_buffer) { - int64_t frame_begin = frame.GetStartTime().GetCounter(); - int64_t frame_end = frame_begin + frame.GetSize(); - if (frame_begin >= out_end) continue; - if (frame_end < out_begin) continue; - int64_t src_offset = std::max(int64_t(0), out_begin - frame_begin); - int64_t dst_offset = std::max(int64_t(0), frame_begin - out_begin); - int64_t start = std::max(out_begin, frame_begin); - int64_t end = std::min(out_end, frame_end); - int64_t size = end - start; - int chans = std::min(channels, frame.GetChannels()); - for (int64_t sample = 0; sample < size; ++sample) { - for (int channel = 0; channel < chans; ++channel) { - plane[(sample + dst_offset) * channels + channel] += frame.GetSample(sample + src_offset, channel); - } - } - written += size; - } + audio.Mix(clock, plane, channels, frame_size); } private: void PullPacket() { if (!input.ReadPacket(packet)) { // EOF - audio_decoder.Flush(); - while (ReceiveAudio()) { - } - video_decoder.Flush(); - while (ReceiveVideo()) { - } + audio.Flush(); + video.Flush(); return; } - if (packet.GetStreamIndex() == audio_stream.GetIndex()) { - audio_decoder.SendPacket(packet); - while (ReceiveAudio()) { - } - } else if (packet.GetStreamIndex() == video_stream.GetIndex()) { - video_decoder.SendPacket(packet); - while (ReceiveVideo()) { - } + if (packet.GetStreamIndex() == audio.GetStreamIndex()) { + audio.Send(packet); + } else if (packet.GetStreamIndex() == video.GetStreamIndex()) { + video.Send(packet); } packet.Unref(); } - bool ReceiveAudio() { - bool res = audio_decoder.ReceiveFrame(audio_input_frame); - if (res) { - seen_audio = true; - Clock in_clock(audio_decoder.GetTimeBase()); - in_clock.Set(audio_input_frame.GetBestEffortTimestamp()); - int converted = resampler.Convert(audio_encoder, audio_input_frame, audio_output_frame); - // this may need time scaling? - BufferAudio(converted); - audio_clock.Advance(converted); - } - return res; - } - - bool ReceiveVideo() { - bool res = video_decoder.ReceiveFrame(video_input_frame); - if (res) { - seen_video = true; - scaler.ScaleFrame(video_input_frame, video_output_frame); - video_clock.Set(video_input_frame.GetPacketTimestamp()); - } - return res; - } - - void BufferAudio(int size) { - const float *plane = reinterpret_cast(audio_output_frame.GetDataPlane(0)); - int channels = audio_encoder.GetChannelLayout().nb_channels; - Clock time = audio_clock.Snapshot(); - audio_buffer.emplace_back(plane, channels, size, time); - } - private: ffmpeg::InputContext input; - ffmpeg::Stream audio_stream; - ffmpeg::Stream video_stream; - ffmpeg::Decoder audio_decoder; - ffmpeg::Decoder video_decoder; - ffmpeg::Frame audio_input_frame; - ffmpeg::Frame video_input_frame; - ffmpeg::Encoder audio_encoder; + AudioReceiver audio; + VideoReceiver video; ffmpeg::Packet packet; - ffmpeg::Scaler scaler; - ffmpeg::Resampler resampler; - ffmpeg::Frame audio_output_frame; - ffmpeg::Frame video_output_frame; - std::list audio_buffer; - Clock audio_clock; - Clock video_clock; - bool seen_audio; - bool seen_video; }; diff --git a/src/app/VideoReceiver.h b/src/app/VideoReceiver.h new file mode 100644 index 0000000..21fc61d --- /dev/null +++ b/src/app/VideoReceiver.h @@ -0,0 +1,98 @@ +#ifndef TEST_APP_VIDEORECEIVER_H_ +#define TEST_APP_VIDEORECEIVER_H_ + +#include "Clock.h" +#include "../cairo/Surface.h" +#include "../ffmpeg/Decoder.h" +#include "../ffmpeg/Frame.h" +#include "../ffmpeg/InputContext.h" +#include "../ffmpeg/Packet.h" +#include "../ffmpeg/Scaler.h" + +namespace app { + +class VideoReceiver { + +public: + explicit VideoReceiver(ffmpeg::InputContext &input) + : stream(input.FindVideoStream()) + , decoder(stream.GetCodecId()) + , ready(false) { + decoder.ReadParameters(stream.GetParameters()); + decoder.SetTimeBase(stream.GetTimeBase()); + decoder.Open(); + scaler.SetOpt("srcw", decoder.GetWidth()); + scaler.SetOpt("srch", decoder.GetHeight()); + scaler.SetOpt("src_format", decoder.GetPixelFormat()); + scaler.SetOpt("dstw", decoder.GetWidth()); + scaler.SetOpt("dsth", decoder.GetHeight()); + scaler.SetOpt("dst_format", AV_PIX_FMT_BGRA); + scaler.Init(); + output_frame.AllocateImage(decoder.GetWidth(), decoder.GetHeight(), AV_PIX_FMT_BGRA); + clock = Clock(stream.GetTimeBase()); + } + ~VideoReceiver() { + } + + VideoReceiver(const VideoReceiver &) = delete; + VideoReceiver &operator =(const VideoReceiver &) = delete; + +public: + const Clock &GetClock() const { + return clock; + } + int GetStreamIndex() const { + return stream.GetIndex(); + } + + bool IsEOF() const { + return decoder.IsEOF(); + } + + bool Ready() const { + return ready; + } + + cairo::Surface GetSurface() { + return cairo::Surface( + output_frame.GetDataPlane(0), output_frame.GetPlaneLinesize(0), CAIRO_FORMAT_ARGB32, + decoder.GetWidth(), decoder.GetHeight() + ); + } + + void Send(const ffmpeg::Packet &packet) { + decoder.SendPacket(packet); + while (Receive()) { + } + } + + void Flush() { + decoder.Flush(); + while (Receive()) { + } + } + + bool Receive() { + bool res = decoder.ReceiveFrame(input_frame); + if (res) { + ready = true; + scaler.ScaleFrame(input_frame, output_frame); + clock.Set(input_frame.GetPacketTimestamp()); + } + return res; + } + +private: + ffmpeg::Stream stream; + ffmpeg::Decoder decoder; + ffmpeg::Frame input_frame; + ffmpeg::Scaler scaler; + ffmpeg::Frame output_frame; + Clock clock; + bool ready; + +}; + +} + +#endif -- 2.39.2