feat: generate audio in test-input

This commit is contained in:
2025-06-06 20:06:47 +01:00
parent 4a2fd4bdcd
commit bc0e3f3b64

View File

@ -4,8 +4,10 @@ use anyhow::Result;
use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVColorSpace::AVCOL_SPC_RGB; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVColorSpace::AVCOL_SPC_RGB;
use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPictureType::AV_PICTURE_TYPE_NONE; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPictureType::AV_PICTURE_TYPE_NONE;
use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPixelFormat::{AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV420P}; use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVPixelFormat::{AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV420P};
use ffmpeg_rs_raw::ffmpeg_sys_the_third::AVSampleFormat::AV_SAMPLE_FMT_FLTP;
use ffmpeg_rs_raw::ffmpeg_sys_the_third::{ use ffmpeg_rs_raw::ffmpeg_sys_the_third::{
av_frame_alloc, av_frame_free, av_frame_get_buffer, av_packet_free, AV_PROFILE_H264_MAIN, av_frame_alloc, av_frame_free, av_frame_get_buffer, av_packet_free, AVRational,
AV_PROFILE_H264_MAIN,
}; };
use ffmpeg_rs_raw::{Encoder, Muxer, Scaler}; use ffmpeg_rs_raw::{Encoder, Muxer, Scaler};
use fontdue::layout::{CoordinateSystem, Layout, TextStyle}; use fontdue::layout::{CoordinateSystem, Layout, TextStyle};
@ -40,12 +42,14 @@ pub async fn listen(out_dir: String, overseer: Arc<dyn Overseer>) -> Result<()>
} }
struct TestPatternSrc { struct TestPatternSrc {
encoder: Encoder, video_encoder: Encoder,
audio_encoder: Encoder,
scaler: Scaler, scaler: Scaler,
muxer: Muxer, muxer: Muxer,
background: Pixmap, background: Pixmap,
font: [Font; 1], font: [Font; 1],
frame_no: u64, frame_no: u64,
audio_sample_no: u64,
start: Instant, start: Instant,
reader: HeapCons<u8>, reader: HeapCons<u8>,
} }
@ -55,7 +59,7 @@ unsafe impl Send for TestPatternSrc {}
impl TestPatternSrc { impl TestPatternSrc {
pub fn new() -> Result<Self> { pub fn new() -> Result<Self> {
let scaler = Scaler::new(); let scaler = Scaler::new();
let encoder = unsafe { let video_encoder = unsafe {
Encoder::new_with_name("libx264")? Encoder::new_with_name("libx264")?
.with_stream_index(0) .with_stream_index(0)
.with_framerate(30.0)? .with_framerate(30.0)?
@ -68,6 +72,16 @@ impl TestPatternSrc {
.open(None)? .open(None)?
}; };
let audio_encoder = unsafe {
Encoder::new_with_name("aac")?
.with_stream_index(1)
.with_default_channel_layout(1)
.with_bitrate(128_000)
.with_sample_format(AV_SAMPLE_FMT_FLTP)
.with_sample_rate(44100)?
.open(None)?
};
let svg_data = include_bytes!("../../test.svg"); let svg_data = include_bytes!("../../test.svg");
let tree = usvg::Tree::from_data(svg_data, &Default::default())?; let tree = usvg::Tree::from_data(svg_data, &Default::default())?;
let mut pixmap = Pixmap::new(1280, 720).unwrap(); let mut pixmap = Pixmap::new(1280, 720).unwrap();
@ -86,19 +100,22 @@ impl TestPatternSrc {
let muxer = unsafe { let muxer = unsafe {
let mut m = Muxer::builder() let mut m = Muxer::builder()
.with_output_write(writer, Some("mpegts"))? .with_output_write(writer, Some("mpegts"))?
.with_stream_encoder(&encoder)? .with_stream_encoder(&video_encoder)?
.with_stream_encoder(&audio_encoder)?
.build()?; .build()?;
m.open(None)?; m.open(None)?;
m m
}; };
Ok(Self { Ok(Self {
encoder, video_encoder,
audio_encoder,
scaler, scaler,
muxer, muxer,
background: pixmap, background: pixmap,
font: [font], font: [font],
frame_no: 0, frame_no: 0,
audio_sample_no: 0,
start: Instant::now(), start: Instant::now(),
reader, reader,
}) })
@ -161,16 +178,70 @@ impl TestPatternSrc {
} }
} }
// scale/encode // scale/encode video
let mut frame = self let mut frame = self
.scaler .scaler
.process_frame(src_frame, 1280, 720, AV_PIX_FMT_YUV420P)?; .process_frame(src_frame, 1280, 720, AV_PIX_FMT_YUV420P)?;
for mut pkt in self.encoder.encode_frame(frame)? { for mut pkt in self.video_encoder.encode_frame(frame)? {
self.muxer.write_packet(pkt)?; self.muxer.write_packet(pkt)?;
av_packet_free(&mut pkt); av_packet_free(&mut pkt);
} }
av_frame_free(&mut frame); av_frame_free(&mut frame);
av_frame_free(&mut src_frame); av_frame_free(&mut src_frame);
// Generate and encode audio (sine wave)
self.generate_audio_frame()?;
Ok(())
}
/// Generate audio to stay synchronized with video frames
unsafe fn generate_audio_frame(&mut self) -> Result<()> {
const SAMPLE_RATE: f32 = 44100.0;
const FREQUENCY: f32 = 440.0; // A4 note
const VIDEO_FPS: f32 = 30.0;
const SAMPLES_PER_FRAME: usize = 1024; // Fixed AAC frame size
// Calculate how many audio samples we should have by now
// At 30fps, each video frame = 1/30 sec = 1470 audio samples at 44.1kHz
let audio_samples_per_video_frame = (SAMPLE_RATE / VIDEO_FPS) as u64; // ~1470 samples
let target_audio_samples = self.frame_no * audio_samples_per_video_frame;
// Generate audio frames to catch up to the target
while self.audio_sample_no < target_audio_samples {
let mut audio_frame = av_frame_alloc();
(*audio_frame).format = AV_SAMPLE_FMT_FLTP as _;
(*audio_frame).nb_samples = SAMPLES_PER_FRAME as _;
(*audio_frame).ch_layout.nb_channels = 1;
(*audio_frame).sample_rate = SAMPLE_RATE as _;
(*audio_frame).pts = self.audio_sample_no as i64;
(*audio_frame).duration = 1;
(*audio_frame).time_base = AVRational {
num: 1,
den: SAMPLE_RATE as _,
};
av_frame_get_buffer(audio_frame, 0);
// Generate sine wave samples
let data = (*audio_frame).data[0] as *mut f32;
for i in 0..SAMPLES_PER_FRAME {
let sample_time = (self.audio_sample_no + i as u64) as f32 / SAMPLE_RATE;
let sample_value =
(2.0 * std::f32::consts::PI * FREQUENCY * sample_time).sin() * 0.5;
*data.add(i) = sample_value;
}
// Encode audio frame
for mut pkt in self.audio_encoder.encode_frame(audio_frame)? {
self.muxer.write_packet(pkt)?;
av_packet_free(&mut pkt);
}
self.audio_sample_no += SAMPLES_PER_FRAME as u64;
av_frame_free(&mut audio_frame);
}
Ok(()) Ok(())
} }
} }