mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Supports more audio formats (#1628)
* Supports more audio formats * Simplify the handling of the different buffer types. * Check the sample rate. --------- Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
@ -18,6 +18,8 @@ use rand::{distributions::Distribution, SeedableRng};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
mod multilingual;
|
||||
mod pcm_decode;
|
||||
|
||||
use candle_transformers::models::whisper::{self as m, audio, Config};
|
||||
|
||||
pub enum Model {
|
||||
@ -535,17 +537,10 @@ fn main() -> Result<()> {
|
||||
let mut mel_filters = vec![0f32; mel_bytes.len() / 4];
|
||||
<byteorder::LittleEndian as byteorder::ByteOrder>::read_f32_into(mel_bytes, &mut mel_filters);
|
||||
|
||||
let mut input = std::fs::File::open(input)?;
|
||||
let (header, data) = wav::read(&mut input)?;
|
||||
println!("loaded wav data: {header:?}");
|
||||
if header.sampling_rate != m::SAMPLE_RATE as u32 {
|
||||
anyhow::bail!("wav file must have a {} sampling rate", m::SAMPLE_RATE)
|
||||
let (pcm_data, sample_rate) = pcm_decode::pcm_decode(input)?;
|
||||
if sample_rate != m::SAMPLE_RATE as u32 {
|
||||
anyhow::bail!("input file must have a {} sampling rate", m::SAMPLE_RATE)
|
||||
}
|
||||
let data = data.as_sixteen().expect("expected 16 bit wav file");
|
||||
let pcm_data: Vec<_> = data[..data.len() / header.channel_count as usize]
|
||||
.iter()
|
||||
.map(|v| *v as f32 / 32768.)
|
||||
.collect();
|
||||
println!("pcm data loaded {}", pcm_data.len());
|
||||
let mel = audio::pcm_to_mel(&config, &pcm_data, &mel_filters);
|
||||
let mel_len = mel.len();
|
||||
|
Reference in New Issue
Block a user