Supports more audio formats (#1628)

* Supports more audio formats

* Simplify the handling of the different buffer types.

* Check the sample rate.

---------

Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
Hubert Shelley
2024-02-03 21:26:04 +08:00
committed by GitHub
parent 96bc704d17
commit dfab45e1c8
4 changed files with 81 additions and 13 deletions

View File

@ -18,6 +18,8 @@ use rand::{distributions::Distribution, SeedableRng};
use tokenizers::Tokenizer;
mod multilingual;
mod pcm_decode;
use candle_transformers::models::whisper::{self as m, audio, Config};
pub enum Model {
@ -535,17 +537,10 @@ fn main() -> Result<()> {
let mut mel_filters = vec![0f32; mel_bytes.len() / 4];
<byteorder::LittleEndian as byteorder::ByteOrder>::read_f32_into(mel_bytes, &mut mel_filters);
let mut input = std::fs::File::open(input)?;
let (header, data) = wav::read(&mut input)?;
println!("loaded wav data: {header:?}");
if header.sampling_rate != m::SAMPLE_RATE as u32 {
anyhow::bail!("wav file must have a {} sampling rate", m::SAMPLE_RATE)
let (pcm_data, sample_rate) = pcm_decode::pcm_decode(input)?;
if sample_rate != m::SAMPLE_RATE as u32 {
anyhow::bail!("input file must have a {} sampling rate", m::SAMPLE_RATE)
}
let data = data.as_sixteen().expect("expected 16 bit wav file");
let pcm_data: Vec<_> = data[..data.len() / header.channel_count as usize]
.iter()
.map(|v| *v as f32 / 32768.)
.collect();
println!("pcm data loaded {}", pcm_data.len());
let mel = audio::pcm_to_mel(&config, &pcm_data, &mel_filters);
let mel_len = mel.len();