mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Fix for whisper-microphone example failure if audio isn't chunk aligned (#2645)
At least on my macOS Sequoia system (MBP 14" 2021, M1 Pro), when I run the `whisper-microphone` example after it has gathered 10 seconds of audio, it fails before the transcription: ``` Error: Insufficient buffer size 384 for input channel 0, expected 1024 ``` At least for the audio device I'm using (Airpods Pro Max), there is no guarantee that each audio buffer is a multiple of 1024 samples. Thus at the end of the 10 seconds, `buffered_pcm` can have some samples at the end that do not form a complete 1024 sample chunk. This fixes that by tracking when there is a partial chunk at the end of the buffer, and leaving it in `buffered_pcm` to be processed on the next loop iteration. Note that, in the interest of keeping this PR as small as possible, I didn't make any other changes to this example.
This commit is contained in:
@ -624,13 +624,27 @@ pub fn main() -> Result<()> {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let mut resampled_pcm = vec![];
|
let mut resampled_pcm = vec![];
|
||||||
for buffered_pcm in buffered_pcm.chunks(1024) {
|
// resample the audio, one chunk of 1024 samples at a time.
|
||||||
|
// in case the audio input failed to produce an exact multiple of 1024 samples,
|
||||||
|
// process the remainder on the next iteration of the loop.
|
||||||
|
let full_chunks = buffered_pcm.len() / 1024;
|
||||||
|
let remainder = buffered_pcm.len() % 1024;
|
||||||
|
for chunk in 0..full_chunks {
|
||||||
|
let buffered_pcm = &buffered_pcm[chunk * 1024..(chunk + 1) * 1024];
|
||||||
let pcm = resampler.process(&[&buffered_pcm], None)?;
|
let pcm = resampler.process(&[&buffered_pcm], None)?;
|
||||||
resampled_pcm.extend_from_slice(&pcm[0])
|
resampled_pcm.extend_from_slice(&pcm[0]);
|
||||||
}
|
}
|
||||||
let pcm = resampled_pcm;
|
let pcm = resampled_pcm;
|
||||||
println!("{} {}", buffered_pcm.len(), pcm.len());
|
println!("{} {}", buffered_pcm.len(), pcm.len());
|
||||||
buffered_pcm.clear();
|
if remainder == 0 {
|
||||||
|
buffered_pcm.clear();
|
||||||
|
} else {
|
||||||
|
// efficiently copy the remainder to the beginning of the `buffered_pcm` buffer and
|
||||||
|
// truncate it. That's more efficient then allocating a new vector and copying into it
|
||||||
|
println!("audio device produced partial chunk with {remainder} samples; processing the remainder on the next iteration of the loop");
|
||||||
|
buffered_pcm.copy_within(full_chunks * 1024.., 0);
|
||||||
|
buffered_pcm.truncate(remainder);
|
||||||
|
}
|
||||||
let mel = audio::pcm_to_mel(&config, &pcm, &mel_filters);
|
let mel = audio::pcm_to_mel(&config, &pcm, &mel_filters);
|
||||||
let mel_len = mel.len();
|
let mel_len = mel.len();
|
||||||
let mel = Tensor::from_vec(
|
let mel = Tensor::from_vec(
|
||||||
|
Reference in New Issue
Block a user