mirror of
https://github.com/huggingface/candle.git
synced 2025-06-21 20:22:49 +00:00
Re-organize the wasm examples (#231)
* Move the whisper example. * More renaming. * Add llama2 as a new wasm example. * Live generation. * More of the llama wasm example. * Formatting.
This commit is contained in:
217
candle-wasm-examples/whisper/src/audio.rs
Normal file
217
candle-wasm-examples/whisper/src/audio.rs
Normal file
@ -0,0 +1,217 @@
|
||||
// Audio processing code, adapted from whisper.cpp
|
||||
// https://github.com/ggerganov/whisper.cpp
|
||||
use super::worker;
|
||||
|
||||
pub trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
|
||||
|
||||
impl Float for f32 {}
|
||||
impl Float for f64 {}
|
||||
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2357
|
||||
fn fft<T: Float>(inp: &[T]) -> Vec<T> {
|
||||
let n = inp.len();
|
||||
let zero = T::zero();
|
||||
if n == 1 {
|
||||
return vec![inp[0], zero];
|
||||
}
|
||||
if n % 2 == 1 {
|
||||
return dft(inp);
|
||||
}
|
||||
let mut out = vec![zero; n * 2];
|
||||
|
||||
let mut even = vec![];
|
||||
even.reserve(n / 2);
|
||||
let mut odd = vec![];
|
||||
odd.reserve(n / 2);
|
||||
|
||||
for (i, &inp) in inp.iter().enumerate() {
|
||||
if i % 2 == 0 {
|
||||
even.push(inp)
|
||||
} else {
|
||||
odd.push(inp);
|
||||
}
|
||||
}
|
||||
|
||||
let even_fft = fft(&even);
|
||||
let odd_fft = fft(&odd);
|
||||
|
||||
let two_pi = T::PI() + T::PI();
|
||||
let n_t = T::from(n).unwrap();
|
||||
for k in 0..n / 2 {
|
||||
let k_t = T::from(k).unwrap();
|
||||
let theta = two_pi * k_t / n_t;
|
||||
let re = theta.cos();
|
||||
let im = -theta.sin();
|
||||
|
||||
let re_odd = odd_fft[2 * k];
|
||||
let im_odd = odd_fft[2 * k + 1];
|
||||
|
||||
out[2 * k] = even_fft[2 * k] + re * re_odd - im * im_odd;
|
||||
out[2 * k + 1] = even_fft[2 * k + 1] + re * im_odd + im * re_odd;
|
||||
|
||||
out[2 * (k + n / 2)] = even_fft[2 * k] - re * re_odd + im * im_odd;
|
||||
out[2 * (k + n / 2) + 1] = even_fft[2 * k + 1] - re * im_odd - im * re_odd;
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2337
|
||||
fn dft<T: Float>(inp: &[T]) -> Vec<T> {
|
||||
let zero = T::zero();
|
||||
let n = inp.len();
|
||||
let two_pi = T::PI() + T::PI();
|
||||
|
||||
let mut out = Vec::new();
|
||||
out.reserve(2 * n);
|
||||
let n_t = T::from(n).unwrap();
|
||||
for k in 0..n {
|
||||
let k_t = T::from(k).unwrap();
|
||||
let mut re = zero;
|
||||
let mut im = zero;
|
||||
|
||||
for (j, &inp) in inp.iter().enumerate() {
|
||||
let j_t = T::from(j).unwrap();
|
||||
let angle = two_pi * k_t * j_t / n_t;
|
||||
re += inp * angle.cos();
|
||||
im -= inp * angle.sin();
|
||||
}
|
||||
|
||||
out.push(re);
|
||||
out.push(im);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2414
|
||||
fn log_mel_spectrogram_w<T: Float>(
|
||||
ith: usize,
|
||||
hann: &[T],
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
fft_size: usize,
|
||||
fft_step: usize,
|
||||
speed_up: bool,
|
||||
n_len: usize,
|
||||
n_mel: usize,
|
||||
n_threads: usize,
|
||||
) -> Vec<T> {
|
||||
let n_fft = if speed_up {
|
||||
1 + fft_size / 4
|
||||
} else {
|
||||
1 + fft_size / 2
|
||||
};
|
||||
|
||||
let zero = T::zero();
|
||||
let half = T::from(0.5).unwrap();
|
||||
let mut fft_in = vec![zero; fft_size];
|
||||
let mut mel = vec![zero; n_len * n_mel];
|
||||
|
||||
for i in (ith..n_len).step_by(n_threads) {
|
||||
let offset = i * fft_step;
|
||||
|
||||
// apply Hanning window
|
||||
for j in 0..fft_size {
|
||||
fft_in[j] = if offset + j < samples.len() {
|
||||
hann[j] * samples[offset + j]
|
||||
} else {
|
||||
zero
|
||||
}
|
||||
}
|
||||
|
||||
// FFT -> mag^2
|
||||
let mut fft_out: Vec<T> = fft(&fft_in);
|
||||
|
||||
for j in 0..fft_size {
|
||||
fft_out[j] = fft_out[2 * j] * fft_out[2 * j] + fft_out[2 * j + 1] * fft_out[2 * j + 1];
|
||||
}
|
||||
for j in 1..fft_size / 2 {
|
||||
let v = fft_out[fft_size - j];
|
||||
fft_out[j] += v;
|
||||
}
|
||||
|
||||
if speed_up {
|
||||
// scale down in the frequency domain results in a speed up in the time domain
|
||||
for j in 0..n_fft {
|
||||
fft_out[j] = half * (fft_out[2 * j] + fft_out[2 * j + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// mel spectrogram
|
||||
for j in 0..n_mel {
|
||||
let mut sum = zero;
|
||||
for k in 0..n_fft {
|
||||
sum += fft_out[k] * filters[j * n_fft + k];
|
||||
}
|
||||
mel[j * n_len + i] = T::max(sum, T::from(1e-10).unwrap()).log10();
|
||||
}
|
||||
}
|
||||
mel
|
||||
}
|
||||
|
||||
fn log_mel_spectrogram_<T: Float + std::fmt::Display>(
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
fft_size: usize,
|
||||
fft_step: usize,
|
||||
n_mel: usize,
|
||||
speed_up: bool,
|
||||
) -> Vec<T> {
|
||||
let zero = T::zero();
|
||||
let two_pi = T::PI() + T::PI();
|
||||
let half = T::from(0.5).unwrap();
|
||||
let one = T::from(1.0).unwrap();
|
||||
let four = T::from(4.0).unwrap();
|
||||
let fft_size_t = T::from(fft_size).unwrap();
|
||||
|
||||
let hann: Vec<T> = (0..fft_size)
|
||||
.map(|i| half * (one - ((two_pi * T::from(i).unwrap()) / fft_size_t).cos()))
|
||||
.collect();
|
||||
let n_len = samples.len() / fft_step;
|
||||
|
||||
// pad audio with at least one extra chunk of zeros
|
||||
let pad = 100 * worker::CHUNK_LENGTH / 2;
|
||||
let n_len = if n_len % pad != 0 {
|
||||
(n_len / pad + 1) * pad
|
||||
} else {
|
||||
n_len
|
||||
};
|
||||
let n_len = n_len + pad;
|
||||
let samples = {
|
||||
let mut samples_padded = samples.to_vec();
|
||||
let to_add = n_len * fft_step - samples.len();
|
||||
samples_padded.extend(std::iter::repeat(zero).take(to_add));
|
||||
samples_padded
|
||||
};
|
||||
|
||||
// Use a single thread for now.
|
||||
let mut mel = log_mel_spectrogram_w(
|
||||
0, &hann, &samples, filters, fft_size, fft_step, speed_up, n_len, n_mel, 1,
|
||||
);
|
||||
let mmax = mel
|
||||
.iter()
|
||||
.max_by(|&u, &v| u.partial_cmp(v).unwrap_or(std::cmp::Ordering::Greater))
|
||||
.copied()
|
||||
.unwrap_or(zero)
|
||||
- T::from(8).unwrap();
|
||||
for m in mel.iter_mut() {
|
||||
let v = T::max(*m, mmax);
|
||||
*m = v / four + one
|
||||
}
|
||||
mel
|
||||
}
|
||||
|
||||
pub fn pcm_to_mel<T: Float + std::fmt::Display>(
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
) -> anyhow::Result<Vec<T>> {
|
||||
let mel = log_mel_spectrogram_(
|
||||
samples,
|
||||
filters,
|
||||
worker::N_FFT,
|
||||
worker::HOP_LENGTH,
|
||||
worker::N_MELS,
|
||||
false,
|
||||
);
|
||||
Ok(mel)
|
||||
}
|
Reference in New Issue
Block a user