PCM conversion.

This commit is contained in:
laurent
2023-07-05 11:02:49 +01:00
parent dd1d55f5c7
commit 648d1511d5

View File

@ -1,6 +1,10 @@
// Audio processing code, adapted from whisper.cpp
// https://github.com/ggerganov/whisper.cpp
const WHISPER_SAMPLE_RATE: usize = 16000;
const WHISPER_N_FFT: usize = 400;
const WHISPER_N_MEL: usize = 80;
const WHISPER_HOP_LENGTH: usize = 160;
const WHISPER_CHUNK_SIZE: usize = 30;
trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
@ -147,7 +151,7 @@ fn log_mel_spectrogram_w<T: Float>(
mel
}
fn log_mel_spectrogram<T: Float>(
fn log_mel_spectrogram_<T: Float>(
samples: &[T],
filters: &[T],
fft_size: usize,
@ -198,3 +202,23 @@ fn log_mel_spectrogram<T: Float>(
}
mel
}
fn pcm_to_mel<T: Float>(samples: &[T], filters: &[T]) -> anyhow::Result<Vec<T>> {
if filters.len() != WHISPER_N_MEL * WHISPER_N_FFT {
anyhow::bail!(
"unexpected filter length {} (n_mel: {}, n_fft: {})",
filters.len(),
WHISPER_N_MEL,
WHISPER_N_FFT
)
}
let mel = log_mel_spectrogram_(
samples,
filters,
WHISPER_N_FFT,
WHISPER_HOP_LENGTH,
WHISPER_N_MEL,
false,
);
Ok(mel)
}