mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 04:00:28 +00:00
Preliminary support for whisper v3. (#1294)
* Preliminary support for whisper v3. * Add the missing files.
This commit is contained in:
@ -198,13 +198,17 @@ fn log_mel_spectrogram_<T: Float + std::fmt::Display>(
|
||||
mel
|
||||
}
|
||||
|
||||
pub fn pcm_to_mel<T: Float + std::fmt::Display>(samples: &[T], filters: &[T]) -> Vec<T> {
|
||||
pub fn pcm_to_mel<T: Float + std::fmt::Display>(
|
||||
cfg: &super::Config,
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
) -> Vec<T> {
|
||||
log_mel_spectrogram_(
|
||||
samples,
|
||||
filters,
|
||||
super::N_FFT,
|
||||
super::HOP_LENGTH,
|
||||
super::N_MELS,
|
||||
cfg.num_mel_bins,
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ pub struct Config {
|
||||
// pub n_text_state: usize,
|
||||
pub decoder_attention_heads: usize, // n_text_head
|
||||
pub decoder_layers: usize, // n_text_layer
|
||||
#[serde(default)]
|
||||
pub suppress_tokens: Vec<u32>,
|
||||
}
|
||||
|
||||
@ -26,7 +27,6 @@ pub const DTYPE: candle::DType = candle::DType::F32;
|
||||
// Audio parameters.
|
||||
pub const SAMPLE_RATE: usize = 16000;
|
||||
pub const N_FFT: usize = 400;
|
||||
pub const N_MELS: usize = 80;
|
||||
pub const HOP_LENGTH: usize = 160;
|
||||
pub const CHUNK_LENGTH: usize = 30;
|
||||
pub const N_SAMPLES: usize = CHUNK_LENGTH * SAMPLE_RATE; // 480000 samples in a 30-second chunk
|
||||
|
Reference in New Issue
Block a user