mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
PCM conversion.
This commit is contained in:
@ -1,6 +1,10 @@
|
||||
// Audio processing code, adapted from whisper.cpp
|
||||
// https://github.com/ggerganov/whisper.cpp
|
||||
|
||||
const WHISPER_SAMPLE_RATE: usize = 16000;
|
||||
const WHISPER_N_FFT: usize = 400;
|
||||
const WHISPER_N_MEL: usize = 80;
|
||||
const WHISPER_HOP_LENGTH: usize = 160;
|
||||
const WHISPER_CHUNK_SIZE: usize = 30;
|
||||
|
||||
trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
|
||||
@ -147,7 +151,7 @@ fn log_mel_spectrogram_w<T: Float>(
|
||||
mel
|
||||
}
|
||||
|
||||
fn log_mel_spectrogram<T: Float>(
|
||||
fn log_mel_spectrogram_<T: Float>(
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
fft_size: usize,
|
||||
@ -198,3 +202,23 @@ fn log_mel_spectrogram<T: Float>(
|
||||
}
|
||||
mel
|
||||
}
|
||||
|
||||
fn pcm_to_mel<T: Float>(samples: &[T], filters: &[T]) -> anyhow::Result<Vec<T>> {
|
||||
if filters.len() != WHISPER_N_MEL * WHISPER_N_FFT {
|
||||
anyhow::bail!(
|
||||
"unexpected filter length {} (n_mel: {}, n_fft: {})",
|
||||
filters.len(),
|
||||
WHISPER_N_MEL,
|
||||
WHISPER_N_FFT
|
||||
)
|
||||
}
|
||||
let mel = log_mel_spectrogram_(
|
||||
samples,
|
||||
filters,
|
||||
WHISPER_N_FFT,
|
||||
WHISPER_HOP_LENGTH,
|
||||
WHISPER_N_MEL,
|
||||
false,
|
||||
);
|
||||
Ok(mel)
|
||||
}
|
||||
|
Reference in New Issue
Block a user