diff --git a/Cargo.toml b/Cargo.toml index 6f2ec0f7..0835efda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ fancy-regex = "0.13.0" gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] } hf-hub = "0.3.0" half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"] } +hound = "3.5.1" image = { version = "0.25.0", default-features = false, features = ["jpeg", "png"] } imageproc = { version = "0.24.0", default-features = false } intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] } @@ -69,7 +70,6 @@ tokenizers = { version = "0.19.1", default-features = false } tracing = "0.1.37" tracing-chrome = "0.7.1" tracing-subscriber = "0.3.7" -wav = "1.0.0" yoke = { version = "0.7.2", features = ["derive"] } zip = { version = "1.1.1", default-features = false } metal = { version = "0.27.0", features = ["mps"]} diff --git a/candle-book/Cargo.toml b/candle-book/Cargo.toml index 5ccda31e..dee55f20 100644 --- a/candle-book/Cargo.toml +++ b/candle-book/Cargo.toml @@ -37,7 +37,6 @@ tokenizers = { workspace = true, features = ["onig"] } tracing = { workspace = true } tracing-chrome = { workspace = true } tracing-subscriber = { workspace = true } -wav = { workspace = true } # Necessary to disambiguate with tokio in wasm examples which are 1.28.1 parquet = { workspace = true } image = { workspace = true } diff --git a/candle-wasm-examples/whisper/Cargo.toml b/candle-wasm-examples/whisper/Cargo.toml index 92e206b2..745b7ae7 100644 --- a/candle-wasm-examples/whisper/Cargo.toml +++ b/candle-wasm-examples/whisper/Cargo.toml @@ -21,7 +21,7 @@ log = { workspace = true } rand = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -wav = { workspace = true } +hound = { workspace = true } safetensors = { workspace = true } # Wasm specific crates. diff --git a/candle-wasm-examples/whisper/src/worker.rs b/candle-wasm-examples/whisper/src/worker.rs index 898996a7..f5c09bae 100644 --- a/candle-wasm-examples/whisper/src/worker.rs +++ b/candle-wasm-examples/whisper/src/worker.rs @@ -345,16 +345,19 @@ impl Decoder { pub fn convert_and_run(&mut self, wav_input: &[u8]) -> anyhow::Result> { let device = Device::Cpu; let mut wav_input = std::io::Cursor::new(wav_input); - let (header, data) = wav::read(&mut wav_input)?; - console_log!("loaded wav data: {header:?}"); - if header.sampling_rate != m::SAMPLE_RATE as u32 { + let wav_reader = hound::WavReader::new(&mut wav_input)?; + let spec = wav_reader.spec(); + console_log!("loaded wav data: {spec:?}"); + if spec.sample_rate != m::SAMPLE_RATE as u32 { anyhow::bail!("wav file must have a {} sampling rate", m::SAMPLE_RATE); } - let data = data.as_sixteen().expect("expected 16 bit wav file"); - let pcm_data: Vec<_> = data[..data.len() / header.channel_count as usize] - .iter() - .map(|v| *v as f32 / 32768.) - .collect(); + let mut data = wav_reader.into_samples::().collect::>(); + data.truncate(data.len() / spec.channels as usize); + let mut pcm_data = Vec::with_capacity(data.len()); + for d in data.into_iter() { + let d = d?; + pcm_data.push(d as f32 / 32768.) + } console_log!("pcm data loaded {}", pcm_data.len()); let mel = crate::audio::pcm_to_mel(self.model.config(), &pcm_data, &self.mel_filters)?; let mel_len = mel.len();