From 58605252e8c9355d6f2452f54918e9eb4b938b1f Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Mon, 18 Mar 2024 11:19:46 +0100 Subject: [PATCH] Microphone support for the encodec example. (#1866) --- candle-examples/examples/encodec/README.md | 8 ++++-- candle-examples/examples/encodec/main.rs | 30 ++++++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/candle-examples/examples/encodec/README.md b/candle-examples/examples/encodec/README.md index 4a8eb0b6..9de0d4ad 100644 --- a/candle-examples/examples/encodec/README.md +++ b/candle-examples/examples/encodec/README.md @@ -13,9 +13,13 @@ cargo run --example encodec --features symphonia --release -- code-to-audio \ ``` This decodes the EnCodec tokens stored in `jfk-codes.safetensors` and generates -an output wav file containing the audio data. If the output file name is set to -`-`, the audio content directly gets played on the computer speakers if any. +an output wav file containing the audio data. + Instead of `code-to-audio` one can use: - `audio-to-audio in.mp3 out.wav`: encodes the input audio file then decodes it to a wav file. - `audio-to-code in.mp3 out.safetensors`: generates a safetensors file containing EnCodec tokens for the input audio file. + +If the audio output file name is set to `-`, the audio content directly gets +played on default audio output device. If the audio input file is set to `-`, the audio +gets recorded from the default audio input. diff --git a/candle-examples/examples/encodec/main.rs b/candle-examples/examples/encodec/main.rs index b8728bdb..e77f98e7 100644 --- a/candle-examples/examples/encodec/main.rs +++ b/candle-examples/examples/encodec/main.rs @@ -60,12 +60,32 @@ fn main() -> Result<()> { codes.get("codes").expect("no codes in input file").clone() } Action::AudioToCode | Action::AudioToAudio => { - let (pcm, sample_rate) = audio_io::pcm_decode(args.in_file)?; - let pcm = if sample_rate != 24_000 { - println!("WARNING: encodec uses a 24khz sample rate, input uses {sample_rate}, resampling..."); - audio_io::resample(&pcm, sample_rate as usize, 24_000)? + let pcm = if args.in_file == "-" { + println!(">>>> RECORDING AUDIO, PRESS ENTER ONCE DONE <<<<"); + let (stream, input_audio) = audio_io::setup_input_stream()?; + let mut pcms = vec![]; + let stdin = std::thread::spawn(|| { + let mut s = String::new(); + std::io::stdin().read_line(&mut s) + }); + while !stdin.is_finished() { + let input = input_audio.lock().unwrap().take_all(); + if input.is_empty() { + std::thread::sleep(std::time::Duration::from_millis(100)); + continue; + } + pcms.push(input) + } + drop(stream); + pcms.concat() } else { - pcm + let (pcm, sample_rate) = audio_io::pcm_decode(args.in_file)?; + if sample_rate != 24_000 { + println!("WARNING: encodec uses a 24khz sample rate, input uses {sample_rate}, resampling..."); + audio_io::resample(&pcm, sample_rate as usize, 24_000)? + } else { + pcm + } }; let pcm_len = pcm.len(); let pcm = Tensor::from_vec(pcm, (1, 1, pcm_len), &device)?;