From bc0e3f3b64fa3047344fbd9f86cce01ca2517e4e Mon Sep 17 00:00:00 2001 From: Kieran Date: Fri, 6 Jun 2025 20:06:47 +0100 Subject: [PATCH] feat: generate audio in test-input --- crates/core/src/ingress/test.rs | 85 ++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 7 deletions(-) diff --git a/crates/core/src/ingress/test.rs b/crates/core/src/ingress/test.rs index da52eef..20542ed 100644 --- a/crates/core/src/ingress/test.rs +++ b/crates/core/src/ingress/test.rs @@ -4,8 +4,10 @@ use anyhow::Result; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVColorSpace::AVCOL_SPC_RGB; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPictureType::AV_PICTURE_TYPE_NONE; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPixelFormat::{AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV420P}; +use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVSampleFormat::AV_SAMPLE_FMT_FLTP; use ffmpeg_rs_raw::ffmpeg_sys_the_third::{ - av_frame_alloc, av_frame_free, av_frame_get_buffer, av_packet_free, AV_PROFILE_H264_MAIN, + av_frame_alloc, av_frame_free, av_frame_get_buffer, av_packet_free, AVRational, + AV_PROFILE_H264_MAIN, }; use ffmpeg_rs_raw::{Encoder, Muxer, Scaler}; use fontdue::layout::{CoordinateSystem, Layout, TextStyle}; @@ -40,12 +42,14 @@ pub async fn listen(out_dir: String, overseer: Arc) -> Result<()> } struct TestPatternSrc { - encoder: Encoder, + video_encoder: Encoder, + audio_encoder: Encoder, scaler: Scaler, muxer: Muxer, background: Pixmap, font: [Font; 1], frame_no: u64, + audio_sample_no: u64, start: Instant, reader: HeapCons, } @@ -55,7 +59,7 @@ unsafe impl Send for TestPatternSrc {} impl TestPatternSrc { pub fn new() -> Result { let scaler = Scaler::new(); - let encoder = unsafe { + let video_encoder = unsafe { Encoder::new_with_name("libx264")? .with_stream_index(0) .with_framerate(30.0)? @@ -68,6 +72,16 @@ impl TestPatternSrc { .open(None)? }; + let audio_encoder = unsafe { + Encoder::new_with_name("aac")? + .with_stream_index(1) + .with_default_channel_layout(1) + .with_bitrate(128_000) + .with_sample_format(AV_SAMPLE_FMT_FLTP) + .with_sample_rate(44100)? + .open(None)? + }; + let svg_data = include_bytes!("../../test.svg"); let tree = usvg::Tree::from_data(svg_data, &Default::default())?; let mut pixmap = Pixmap::new(1280, 720).unwrap(); @@ -86,19 +100,22 @@ impl TestPatternSrc { let muxer = unsafe { let mut m = Muxer::builder() .with_output_write(writer, Some("mpegts"))? - .with_stream_encoder(&encoder)? + .with_stream_encoder(&video_encoder)? + .with_stream_encoder(&audio_encoder)? .build()?; m.open(None)?; m }; Ok(Self { - encoder, + video_encoder, + audio_encoder, scaler, muxer, background: pixmap, font: [font], frame_no: 0, + audio_sample_no: 0, start: Instant::now(), reader, }) @@ -161,16 +178,70 @@ impl TestPatternSrc { } } - // scale/encode + // scale/encode video let mut frame = self .scaler .process_frame(src_frame, 1280, 720, AV_PIX_FMT_YUV420P)?; - for mut pkt in self.encoder.encode_frame(frame)? { + for mut pkt in self.video_encoder.encode_frame(frame)? { self.muxer.write_packet(pkt)?; av_packet_free(&mut pkt); } av_frame_free(&mut frame); av_frame_free(&mut src_frame); + + // Generate and encode audio (sine wave) + self.generate_audio_frame()?; + + Ok(()) + } + + /// Generate audio to stay synchronized with video frames + unsafe fn generate_audio_frame(&mut self) -> Result<()> { + const SAMPLE_RATE: f32 = 44100.0; + const FREQUENCY: f32 = 440.0; // A4 note + const VIDEO_FPS: f32 = 30.0; + const SAMPLES_PER_FRAME: usize = 1024; // Fixed AAC frame size + + // Calculate how many audio samples we should have by now + // At 30fps, each video frame = 1/30 sec = 1470 audio samples at 44.1kHz + let audio_samples_per_video_frame = (SAMPLE_RATE / VIDEO_FPS) as u64; // ~1470 samples + let target_audio_samples = self.frame_no * audio_samples_per_video_frame; + + // Generate audio frames to catch up to the target + while self.audio_sample_no < target_audio_samples { + let mut audio_frame = av_frame_alloc(); + (*audio_frame).format = AV_SAMPLE_FMT_FLTP as _; + (*audio_frame).nb_samples = SAMPLES_PER_FRAME as _; + (*audio_frame).ch_layout.nb_channels = 1; + (*audio_frame).sample_rate = SAMPLE_RATE as _; + (*audio_frame).pts = self.audio_sample_no as i64; + (*audio_frame).duration = 1; + (*audio_frame).time_base = AVRational { + num: 1, + den: SAMPLE_RATE as _, + }; + + av_frame_get_buffer(audio_frame, 0); + + // Generate sine wave samples + let data = (*audio_frame).data[0] as *mut f32; + for i in 0..SAMPLES_PER_FRAME { + let sample_time = (self.audio_sample_no + i as u64) as f32 / SAMPLE_RATE; + let sample_value = + (2.0 * std::f32::consts::PI * FREQUENCY * sample_time).sin() * 0.5; + *data.add(i) = sample_value; + } + + // Encode audio frame + for mut pkt in self.audio_encoder.encode_frame(audio_frame)? { + self.muxer.write_packet(pkt)?; + av_packet_free(&mut pkt); + } + + self.audio_sample_no += SAMPLES_PER_FRAME as u64; + av_frame_free(&mut audio_frame); + } + Ok(()) } }