clean up media source

author Daniel Karbach <daniel.karbach@localhorst.tv>

Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)

committer Daniel Karbach <daniel.karbach@localhorst.tv>

Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)
author Daniel Karbach <daniel.karbach@localhorst.tv>
Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)
committer Daniel Karbach <daniel.karbach@localhorst.tv>
Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)
diff --git a/src/app/AudioReceiver.h b/src/app/AudioReceiver.h

new file mode 100644 (file)

index 0000000..50bb340
--- /dev/null
+++ b/src/app/AudioReceiver.h
@@ -0,0 +1,145 @@
+#ifndef TEST_APP_AUDIORECEIVER_H_
+#define TEST_APP_AUDIORECEIVER_H_
+
+#include <list>
+extern "C" {
+#include <libavcodec/avcodec.h>
+}
+
+#include "AudioFrameSnapshot.h"
+#include "Clock.h"
+#include "../ffmpeg/Decoder.h"
+#include "../ffmpeg/Encoder.h"
+#include "../ffmpeg/Frame.h"
+#include "../ffmpeg/InputContext.h"
+#include "../ffmpeg/Packet.h"
+#include "../ffmpeg/Resampler.h"
+
+namespace app {
+
+class AudioReceiver {
+
+public:
+       explicit AudioReceiver(ffmpeg::InputContext &input)
+       : stream(input.FindAudioStream())
+       , decoder(stream.GetCodecId())
+       , encoder(AV_CODEC_ID_PCM_F32LE)
+       , ready(false) {
+               decoder.ReadParameters(stream.GetParameters());
+               decoder.SetTimeBase(stream.GetTimeBase());
+               decoder.Open();
+               encoder.SetDefaultChannelLayout(2);
+               encoder.SetSampleRate(48000);
+               encoder.SetSampleFormat(AV_SAMPLE_FMT_FLT);
+               encoder.Open();
+               resampler.SetOpt("in_channel_count", decoder.GetChannelLayout().nb_channels);
+               resampler.SetOpt("in_sample_rate", decoder.GetSampleRate());
+               resampler.SetOpt("in_sample_fmt", decoder.GetSampleFormat());
+               resampler.SetOpt("out_channel_count", encoder.GetChannelLayout().nb_channels);
+               resampler.SetOpt("out_sample_rate", encoder.GetSampleRate());
+               resampler.SetOpt("out_sample_fmt", encoder.GetSampleFormat());
+               resampler.Init();
+               if (encoder.GetFrameSize() > 0) {
+                       output_frame.AllocateAudio(encoder.GetFrameSize(), encoder.GetSampleFormat(), encoder.GetChannelLayout());
+               } else {
+                       output_frame.AllocateAudio(decoder.GetFrameSize(), encoder.GetSampleFormat(), encoder.GetChannelLayout());
+               }
+               clock = Clock(encoder.GetTimeBase());
+       }
+       ~AudioReceiver() {
+       }
+
+       AudioReceiver(const AudioReceiver &) = delete;
+       AudioReceiver &operator =(const AudioReceiver &) = delete;
+
+public:
+       void DiscardExpired(int64_t sample_head) {
+               while (!buffer.empty() && buffer.front().GetEndTime().GetCounter() < sample_head) {
+                       buffer.pop_front();
+               }
+       }
+
+       int64_t GetSampleEnd() const {
+               return clock.GetCounter() + encoder.GetFrameSize();
+       }
+
+       int GetStreamIndex() const {
+               return stream.GetIndex();
+       }
+
+       bool IsEOF() const {
+               return decoder.IsEOF();
+       }
+
+       bool Ready() const {
+               return ready;
+       }
+
+       void Send(const ffmpeg::Packet &packet) {
+               decoder.SendPacket(packet);
+               while (Receive()) {
+               }
+       }
+
+       void Flush() {
+               decoder.Flush();
+               while (Receive()) {
+               }
+       }
+
+       bool Receive() {
+               bool res = decoder.ReceiveFrame(input_frame);
+               if (res) {
+                       ready = true;
+                       int converted = resampler.Convert(encoder, input_frame, output_frame);
+                       Buffer(converted);
+                       clock.Advance(converted);
+               }
+               return res;
+       }
+
+       void Buffer(int size) {
+               const float *plane = reinterpret_cast<float *>(output_frame.GetDataPlane(0));
+               int channels = encoder.GetChannelLayout().nb_channels;
+               Clock time = clock.Snapshot();
+               buffer.emplace_back(plane, channels, size, time);
+       }
+
+       void Mix(const Clock &clock, float *plane, int channels, int frame_size) const {
+               int64_t out_begin = clock.GetCounter();
+               int64_t out_end = out_begin + frame_size;
+               for (const AudioFrameSnapshot &frame : buffer) {
+                       int64_t frame_begin = frame.GetStartTime().GetCounter();
+                       int64_t frame_end = frame_begin + frame.GetSize();
+                       if (frame_begin >= out_end) continue;
+                       if (frame_end < out_begin) continue;
+                       int64_t src_offset = std::max(int64_t(0), out_begin - frame_begin);
+                       int64_t dst_offset = std::max(int64_t(0), frame_begin - out_begin);
+                       int64_t start = std::max(out_begin, frame_begin);
+                       int64_t end = std::min(out_end, frame_end);
+                       int64_t size = end - start;
+                       int chans = std::min(channels, frame.GetChannels());
+                       for (int64_t sample = 0; sample < size; ++sample) {
+                               for (int channel = 0; channel < chans; ++channel) {
+                                       plane[(sample + dst_offset) * channels + channel] += frame.GetSample(sample + src_offset, channel);
+                               }
+                       }
+               }
+       }
+
+private:
+       ffmpeg::Stream stream;
+       ffmpeg::Decoder decoder;
+       ffmpeg::Frame input_frame;
+       ffmpeg::Encoder encoder;
+       ffmpeg::Resampler resampler;
+       ffmpeg::Frame output_frame;
+       std::list<AudioFrameSnapshot> buffer;
+       Clock clock;
+       bool ready;
+
+};
+
+}
+
+#endif
diff --git a/src/app/Source.h b/src/app/Source.h

index 7a868b8931db9498c2a55574a13e6c3ac500b32c..8893798e674536ac3ed304bf2b113961358d1ba7 100644 (file)
--- a/src/app/Source.h
+++ b/src/app/Source.h
@@ -1,30 +1,20 @@
  #ifndef TEST_APP_SOURCE_H_
  #define TEST_APP_SOURCE_H_
  
-#include <algorithm>
  #include <cairo.h>
  #include <cmath>
-#include <iostream>
-#include <list>
  extern "C" {
-#include <libavcodec/codec_id.h>
+#include <libavcodec/avcodec.h>
  #include <libavformat/avformat.h>
-#include <libavutil/channel_layout.h>
-#include <libavutil/pixfmt.h>
-#include <libavutil/samplefmt.h>
+#include <libavutil/avutil.h>
  }
  
-#include "AudioFrameSnapshot.h"
+#include "AudioReceiver.h"
  #include "Clock.h"
+#include "VideoReceiver.h"
  #include "../cairo/Surface.h"
-#include "../ffmpeg/Decoder.h"
-#include "../ffmpeg/Encoder.h"
-#include "../ffmpeg/Frame.h"
  #include "../ffmpeg/InputContext.h"
  #include "../ffmpeg/Packet.h"
-#include "../ffmpeg/Resampler.h"
-#include "../ffmpeg/Scaler.h"
-#include "../ffmpeg/Stream.h"
  
  namespace app {
  
@@ -33,45 +23,8 @@ class Source {
  public:
         explicit Source(const char *url)
         : input(url)
-       , audio_stream(input.FindAudioStream())
-       , video_stream(input.FindVideoStream())
-       , audio_decoder(audio_stream.GetCodecId())
-       , video_decoder(video_stream.GetCodecId())
-       , audio_encoder(AV_CODEC_ID_PCM_F32LE)
-       , seen_audio(false)
-       , seen_video(false) {
-               audio_decoder.ReadParameters(audio_stream.GetParameters());
-               audio_decoder.SetTimeBase(audio_stream.GetTimeBase());
-               audio_decoder.Open();
-               video_decoder.ReadParameters(video_stream.GetParameters());
-               video_decoder.SetTimeBase(video_stream.GetTimeBase());
-               video_decoder.Open();
-               audio_encoder.SetDefaultChannelLayout(2);
-               audio_encoder.SetSampleRate(48000);
-               audio_encoder.SetSampleFormat(AV_SAMPLE_FMT_FLT);
-               audio_encoder.Open();
-               resampler.SetOpt("in_channel_count", audio_decoder.GetChannelLayout().nb_channels);
-               resampler.SetOpt("in_sample_rate", audio_decoder.GetSampleRate());
-               resampler.SetOpt("in_sample_fmt", audio_decoder.GetSampleFormat());
-               resampler.SetOpt("out_channel_count", audio_encoder.GetChannelLayout().nb_channels);
-               resampler.SetOpt("out_sample_rate", audio_encoder.GetSampleRate());
-               resampler.SetOpt("out_sample_fmt", audio_encoder.GetSampleFormat());
-               resampler.Init();
-               scaler.SetOpt("srcw", video_decoder.GetWidth());
-               scaler.SetOpt("srch", video_decoder.GetHeight());
-               scaler.SetOpt("src_format", video_decoder.GetPixelFormat());
-               scaler.SetOpt("dstw", video_decoder.GetWidth());
-               scaler.SetOpt("dsth", video_decoder.GetHeight());
-               scaler.SetOpt("dst_format", AV_PIX_FMT_BGRA);
-               scaler.Init();
-               if (audio_encoder.GetFrameSize() > 0) {
-                       audio_output_frame.AllocateAudio(audio_encoder.GetFrameSize(), audio_encoder.GetSampleFormat(), audio_encoder.GetChannelLayout());
-               } else {
-                       audio_output_frame.AllocateAudio(audio_decoder.GetFrameSize(), audio_encoder.GetSampleFormat(), audio_encoder.GetChannelLayout());
-               }
-               video_output_frame.AllocateImage(video_decoder.GetWidth(), video_decoder.GetHeight(), AV_PIX_FMT_BGRA);
-               audio_clock = Clock(audio_encoder.GetTimeBase());
-               video_clock = Clock(video_stream.GetTimeBase());
+       , audio(input)
+       , video(input) {
         }
         ~Source() {
         }
@@ -81,149 +34,64 @@ public:
  
  public:
         void SeekAudio(const Clock &target, int frame_size) {
-               while (audio_clock.GetCounter() + audio_encoder.GetFrameSize() < target.GetCounter() + frame_size && !audio_decoder.IsEOF()) {
-                       if (!ReceiveAudio()) {
+               while (!audio.IsEOF() && audio.GetSampleEnd() < target.GetCounter() + frame_size) {
+                       if (!audio.Receive()) {
+                               // TODO: this could potentially discard unexposed video frames
                                 PullPacket();
                         }
                 }
-               while (!audio_buffer.empty() && audio_buffer.front().GetEndTime().GetCounter() < target.GetCounter()) {
-                       audio_buffer.pop_front();
-               }
+               audio.DiscardExpired(target.GetCounter());
         }
  
         void SeekVideo(const Clock &target) {
-               while (video_clock.GetMS() < target.GetMS() && !video_decoder.IsEOF()) {
-                       if (!ReceiveVideo()) {
+               while (!video.IsEOF() && video.GetClock().GetMS() < target.GetMS()) {
+                       if (!video.Receive()) {
                                 PullPacket();
                         }
                 }
         }
  
         bool HasSeenAudio() const {
-               return seen_audio;
+               return audio.Ready();
         }
  
         bool HasSeenVideo() const {
-               return seen_video;
+               return video.Ready();
         }
  
         bool IsEOF() const {
-               return audio_decoder.IsEOF() && video_decoder.IsEOF();
-       }
-
-       const AudioFrameSnapshot &CurrentAudioFrame() const {
-               return audio_buffer.front();
-       }
-
-       void DropAudioFrame() {
-               audio_buffer.pop_front();
+               return audio.IsEOF() && video.IsEOF();
         }
  
         cairo::Surface GetVideoSurface() {
-               return cairo::Surface(
-                       video_output_frame.GetDataPlane(0), video_output_frame.GetPlaneLinesize(0), CAIRO_FORMAT_ARGB32,
-                       video_decoder.GetWidth(), video_decoder.GetHeight()
-               );
+               return video.GetSurface();
         }
  
         void Mix(const Clock &clock, float *plane, int channels, int frame_size) const {
-               int64_t out_begin = clock.GetCounter();
-               int64_t out_end = out_begin + frame_size;
-               int written = 0;
-               for (const AudioFrameSnapshot &frame : audio_buffer) {
-                       int64_t frame_begin = frame.GetStartTime().GetCounter();
-                       int64_t frame_end = frame_begin + frame.GetSize();
-                       if (frame_begin >= out_end) continue;
-                       if (frame_end < out_begin) continue;
-                       int64_t src_offset = std::max(int64_t(0), out_begin - frame_begin);
-                       int64_t dst_offset = std::max(int64_t(0), frame_begin - out_begin);
-                       int64_t start = std::max(out_begin, frame_begin);
-                       int64_t end = std::min(out_end, frame_end);
-                       int64_t size = end - start;
-                       int chans = std::min(channels, frame.GetChannels());
-                       for (int64_t sample = 0; sample < size; ++sample) {
-                               for (int channel = 0; channel < chans; ++channel) {
-                                       plane[(sample + dst_offset) * channels + channel] += frame.GetSample(sample + src_offset, channel);
-                               }
-                       }
-                       written += size;
-               }
+               audio.Mix(clock, plane, channels, frame_size);
         }
  
  private:
         void PullPacket() {
                 if (!input.ReadPacket(packet)) {
                         // EOF
-                       audio_decoder.Flush();
-                       while (ReceiveAudio()) {
-                       }
-                       video_decoder.Flush();
-                       while (ReceiveVideo()) {
-                       }
+                       audio.Flush();
+                       video.Flush();
                         return;
                 }
-               if (packet.GetStreamIndex() == audio_stream.GetIndex()) {
-                       audio_decoder.SendPacket(packet);
-                       while (ReceiveAudio()) {
-                       }
-               } else if (packet.GetStreamIndex() == video_stream.GetIndex()) {
-                       video_decoder.SendPacket(packet);
-                       while (ReceiveVideo()) {
-                       }
+               if (packet.GetStreamIndex() == audio.GetStreamIndex()) {
+                       audio.Send(packet);
+               } else if (packet.GetStreamIndex() == video.GetStreamIndex()) {
+                       video.Send(packet);
                 }
                 packet.Unref();
         }
  
-       bool ReceiveAudio() {
-               bool res = audio_decoder.ReceiveFrame(audio_input_frame);
-               if (res) {
-                       seen_audio = true;
-                       Clock in_clock(audio_decoder.GetTimeBase());
-                       in_clock.Set(audio_input_frame.GetBestEffortTimestamp());
-                       int converted = resampler.Convert(audio_encoder, audio_input_frame, audio_output_frame);
-                       // this may need time scaling?
-                       BufferAudio(converted);
-                       audio_clock.Advance(converted);
-               }
-               return res;
-       }
-
-       bool ReceiveVideo() {
-               bool res = video_decoder.ReceiveFrame(video_input_frame);
-               if (res) {
-                       seen_video = true;
-                       scaler.ScaleFrame(video_input_frame, video_output_frame);
-                       video_clock.Set(video_input_frame.GetPacketTimestamp());
-               }
-               return res;
-       }
-
-       void BufferAudio(int size) {
-               const float *plane = reinterpret_cast<float *>(audio_output_frame.GetDataPlane(0));
-               int channels = audio_encoder.GetChannelLayout().nb_channels;
-               Clock time = audio_clock.Snapshot();
-               audio_buffer.emplace_back(plane, channels, size, time);
-       }
-
  private:
         ffmpeg::InputContext input;
-       ffmpeg::Stream audio_stream;
-       ffmpeg::Stream video_stream;
-       ffmpeg::Decoder audio_decoder;
-       ffmpeg::Decoder video_decoder;
-       ffmpeg::Frame audio_input_frame;
-       ffmpeg::Frame video_input_frame;
-       ffmpeg::Encoder audio_encoder;
+       AudioReceiver audio;
+       VideoReceiver video;
         ffmpeg::Packet packet;
-       ffmpeg::Scaler scaler;
-       ffmpeg::Resampler resampler;
-       ffmpeg::Frame audio_output_frame;
-       ffmpeg::Frame video_output_frame;
-       std::list<AudioFrameSnapshot> audio_buffer;
-       Clock audio_clock;
-       Clock video_clock;
-       bool seen_audio;
-       bool seen_video;
  
  };
  
diff --git a/src/app/VideoReceiver.h b/src/app/VideoReceiver.h

new file mode 100644 (file)

index 0000000..21fc61d
--- /dev/null
+++ b/src/app/VideoReceiver.h
@@ -0,0 +1,98 @@
+#ifndef TEST_APP_VIDEORECEIVER_H_
+#define TEST_APP_VIDEORECEIVER_H_
+
+#include "Clock.h"
+#include "../cairo/Surface.h"
+#include "../ffmpeg/Decoder.h"
+#include "../ffmpeg/Frame.h"
+#include "../ffmpeg/InputContext.h"
+#include "../ffmpeg/Packet.h"
+#include "../ffmpeg/Scaler.h"
+
+namespace app {
+
+class VideoReceiver {
+
+public:
+       explicit VideoReceiver(ffmpeg::InputContext &input)
+       : stream(input.FindVideoStream())
+       , decoder(stream.GetCodecId())
+       , ready(false) {
+               decoder.ReadParameters(stream.GetParameters());
+               decoder.SetTimeBase(stream.GetTimeBase());
+               decoder.Open();
+               scaler.SetOpt("srcw", decoder.GetWidth());
+               scaler.SetOpt("srch", decoder.GetHeight());
+               scaler.SetOpt("src_format", decoder.GetPixelFormat());
+               scaler.SetOpt("dstw", decoder.GetWidth());
+               scaler.SetOpt("dsth", decoder.GetHeight());
+               scaler.SetOpt("dst_format", AV_PIX_FMT_BGRA);
+               scaler.Init();
+               output_frame.AllocateImage(decoder.GetWidth(), decoder.GetHeight(), AV_PIX_FMT_BGRA);
+               clock = Clock(stream.GetTimeBase());
+       }
+       ~VideoReceiver() {
+       }
+
+       VideoReceiver(const VideoReceiver &) = delete;
+       VideoReceiver &operator =(const VideoReceiver &) = delete;
+
+public:
+       const Clock &GetClock() const {
+               return clock;
+       }
+       int GetStreamIndex() const {
+               return stream.GetIndex();
+       }
+
+       bool IsEOF() const {
+               return decoder.IsEOF();
+       }
+
+       bool Ready() const {
+               return ready;
+       }
+
+       cairo::Surface GetSurface() {
+               return cairo::Surface(
+                       output_frame.GetDataPlane(0), output_frame.GetPlaneLinesize(0), CAIRO_FORMAT_ARGB32,
+                       decoder.GetWidth(), decoder.GetHeight()
+               );
+       }
+
+       void Send(const ffmpeg::Packet &packet) {
+               decoder.SendPacket(packet);
+               while (Receive()) {
+               }
+       }
+
+       void Flush() {
+               decoder.Flush();
+               while (Receive()) {
+               }
+       }
+
+       bool Receive() {
+               bool res = decoder.ReceiveFrame(input_frame);
+               if (res) {
+                       ready = true;
+                       scaler.ScaleFrame(input_frame, output_frame);
+                       clock.Set(input_frame.GetPacketTimestamp());
+               }
+               return res;
+       }
+
+private:
+       ffmpeg::Stream stream;
+       ffmpeg::Decoder decoder;
+       ffmpeg::Frame input_frame;
+       ffmpeg::Scaler scaler;
+       ffmpeg::Frame output_frame;
+       Clock clock;
+       bool ready;
+
+};
+
+}
+
+#endif
author	Daniel Karbach <daniel.karbach@localhorst.tv>
	Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)
committer	Daniel Karbach <daniel.karbach@localhorst.tv>
	Thu, 10 Oct 2024 08:38:15 +0000 (10:38 +0200)
src/app/AudioReceiver.h	[new file with mode: 0644]	patch \| blob
src/app/Source.h		patch \| blob \| history
src/app/VideoReceiver.h	[new file with mode: 0644]	patch \| blob