mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
PCM conversion.
This commit is contained in:
@ -1,6 +1,10 @@
|
|||||||
// Audio processing code, adapted from whisper.cpp
|
// Audio processing code, adapted from whisper.cpp
|
||||||
// https://github.com/ggerganov/whisper.cpp
|
// https://github.com/ggerganov/whisper.cpp
|
||||||
|
|
||||||
|
const WHISPER_SAMPLE_RATE: usize = 16000;
|
||||||
|
const WHISPER_N_FFT: usize = 400;
|
||||||
|
const WHISPER_N_MEL: usize = 80;
|
||||||
|
const WHISPER_HOP_LENGTH: usize = 160;
|
||||||
const WHISPER_CHUNK_SIZE: usize = 30;
|
const WHISPER_CHUNK_SIZE: usize = 30;
|
||||||
|
|
||||||
trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
|
trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
|
||||||
@ -147,7 +151,7 @@ fn log_mel_spectrogram_w<T: Float>(
|
|||||||
mel
|
mel
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_mel_spectrogram<T: Float>(
|
fn log_mel_spectrogram_<T: Float>(
|
||||||
samples: &[T],
|
samples: &[T],
|
||||||
filters: &[T],
|
filters: &[T],
|
||||||
fft_size: usize,
|
fft_size: usize,
|
||||||
@ -198,3 +202,23 @@ fn log_mel_spectrogram<T: Float>(
|
|||||||
}
|
}
|
||||||
mel
|
mel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn pcm_to_mel<T: Float>(samples: &[T], filters: &[T]) -> anyhow::Result<Vec<T>> {
|
||||||
|
if filters.len() != WHISPER_N_MEL * WHISPER_N_FFT {
|
||||||
|
anyhow::bail!(
|
||||||
|
"unexpected filter length {} (n_mel: {}, n_fft: {})",
|
||||||
|
filters.len(),
|
||||||
|
WHISPER_N_MEL,
|
||||||
|
WHISPER_N_FFT
|
||||||
|
)
|
||||||
|
}
|
||||||
|
let mel = log_mel_spectrogram_(
|
||||||
|
samples,
|
||||||
|
filters,
|
||||||
|
WHISPER_N_FFT,
|
||||||
|
WHISPER_HOP_LENGTH,
|
||||||
|
WHISPER_N_MEL,
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
Ok(mel)
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user