From 1d0bb48fae08f9bb5b6547ccff086c24b87a6775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radam=C3=A9s=20Ajna?= Date: Wed, 30 Aug 2023 11:35:41 -0700 Subject: [PATCH] Improve Whisper WASM UI example (#669) * wip add module and js worker example * params * clean up, send error * final UI with whisper webworker * add simple instructions --- candle-wasm-examples/whisper/README.md | 56 ++++ candle-wasm-examples/whisper/build-lib.sh | 2 + candle-wasm-examples/whisper/lib-example.html | 313 ++++++++++++++++++ candle-wasm-examples/whisper/src/bin/m.rs | 41 +++ candle-wasm-examples/whisper/src/lib.rs | 2 +- candle-wasm-examples/whisper/src/worker.rs | 4 +- candle-wasm-examples/whisper/whisperWorker.js | 72 ++++ 7 files changed, 487 insertions(+), 3 deletions(-) create mode 100644 candle-wasm-examples/whisper/README.md create mode 100644 candle-wasm-examples/whisper/build-lib.sh create mode 100644 candle-wasm-examples/whisper/lib-example.html create mode 100644 candle-wasm-examples/whisper/src/bin/m.rs create mode 100644 candle-wasm-examples/whisper/whisperWorker.js diff --git a/candle-wasm-examples/whisper/README.md b/candle-wasm-examples/whisper/README.md new file mode 100644 index 00000000..b847a965 --- /dev/null +++ b/candle-wasm-examples/whisper/README.md @@ -0,0 +1,56 @@ +## Running Whisper Examples + +Here, we provide two examples of how to run Whisper using a Candle-compiled WASM binary and runtimes. + +### Pure Rust UI + +To build and test the UI made in Rust you will need [Trunk](https://trunkrs.dev/#install) +From the `candle-wasm-examples/whisper` directory run: + +Download assets: + +```bash +# Model and tokenizer +wget -c https://huggingface.co/spaces/lmz/candle-whisper/resolve/main/mel_filters.safetensors +wget -c https://huggingface.co/spaces/lmz/candle-whisper/resolve/main/tiny.en.safetensors +wget -c https://huggingface.co/spaces/lmz/candle-whisper/resolve/main/tokenizer.en.json + + +# Audio samples +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_gb0.wav -O gb0.wav +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_a13.wav -O a13.wav +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_gb1.wav -O gb1.wav +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_hp0.wav -O hp0.wav +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_jfk.wav -O jfk.wav +wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_mm0.wav -O mm0.wav + +``` + +Run hot reload server: + +```bash +trunk serve --release --public-url / --port 8080 +``` + +### Vanilla JS and WebWorkers + +To build and test the UI made in Vanilla JS and WebWorkers, first we need to build the WASM library: + +```bash +sh build-lib.sh +``` + +This will bundle the library under `./build` and we can import it inside our WebWorker like a normal JS module: + +```js +import init, { Decoder } from "./build/m.js"; +``` + +The full example can be found under `./lib-example.html`. All needed assets are fetched from the web, so no need to download anything. +Finally, you can preview the example by running a local HTTP server. For example: + +```bash +python -m http.server +``` + +Then open `http://localhost:8000/lib-example.html` in your browser. diff --git a/candle-wasm-examples/whisper/build-lib.sh b/candle-wasm-examples/whisper/build-lib.sh new file mode 100644 index 00000000..b0ebb182 --- /dev/null +++ b/candle-wasm-examples/whisper/build-lib.sh @@ -0,0 +1,2 @@ +cargo build --target wasm32-unknown-unknown --release +wasm-bindgen ../../target/wasm32-unknown-unknown/release/m.wasm --out-dir build --target web diff --git a/candle-wasm-examples/whisper/lib-example.html b/candle-wasm-examples/whisper/lib-example.html new file mode 100644 index 00000000..a8c49785 --- /dev/null +++ b/candle-wasm-examples/whisper/lib-example.html @@ -0,0 +1,313 @@ + + + + Candle Whisper Rust/WASM + + + + + + + + + + + + + + +
+ 🕯️ +
+

Candle Whisper

+

Rust/WASM Demo

+

+ Transcribe audio in the browser using rust/wasm with an audio file. + This demo uses the + + OpenAI Whisper models + + and WASM runtime built with + Candle + +

+
+ +
+ + +
+ +
+
+
+ + + +
+ +
+ +
+ +
+
+
+
+

Examples:

+ + + + + + +
+
+ +
+ +
+
+

Transcription:

+ +
+
+
+ + diff --git a/candle-wasm-examples/whisper/src/bin/m.rs b/candle-wasm-examples/whisper/src/bin/m.rs new file mode 100644 index 00000000..88b25267 --- /dev/null +++ b/candle-wasm-examples/whisper/src/bin/m.rs @@ -0,0 +1,41 @@ +use candle_wasm_example_whisper::worker::{Decoder as D, ModelData}; +use wasm_bindgen::prelude::*; + +#[wasm_bindgen] +pub struct Decoder { + decoder: D, +} + +#[wasm_bindgen] +impl Decoder { + #[wasm_bindgen(constructor)] + pub fn new( + weights: Vec, + tokenizer: Vec, + mel_filters: Vec, + ) -> Result { + let decoder = D::load(ModelData { + tokenizer, + mel_filters, + weights, + }); + + match decoder { + Ok(decoder) => Ok(Self { decoder }), + Err(e) => Err(JsError::new(&e.to_string())), + } + } + + #[wasm_bindgen] + pub fn decode(&self, wav_input: Vec) -> Result { + let segments = self + .decoder + .convert_and_run(&wav_input) + .map_err(|e| JsError::new(&e.to_string()))?; + + let json = serde_json::to_string(&segments)?; + Ok(json) + } +} + +fn main() {} diff --git a/candle-wasm-examples/whisper/src/lib.rs b/candle-wasm-examples/whisper/src/lib.rs index d738ca6a..141714f5 100644 --- a/candle-wasm-examples/whisper/src/lib.rs +++ b/candle-wasm-examples/whisper/src/lib.rs @@ -24,6 +24,6 @@ impl Drop for Timer { mod app; mod audio; mod model; -mod worker; +pub mod worker; pub use app::App; pub use worker::Worker; diff --git a/candle-wasm-examples/whisper/src/worker.rs b/candle-wasm-examples/whisper/src/worker.rs index bbcae36c..49b2cd09 100644 --- a/candle-wasm-examples/whisper/src/worker.rs +++ b/candle-wasm-examples/whisper/src/worker.rs @@ -222,7 +222,7 @@ impl Decoder { Ok(segments) } - fn load(md: ModelData) -> anyhow::Result { + pub fn load(md: ModelData) -> anyhow::Result { let device = Device::Cpu; let tokenizer = Tokenizer::from_bytes(&md.tokenizer).map_err(anyhow::Error::msg)?; @@ -239,7 +239,7 @@ impl Decoder { Ok(decoder) } - fn convert_and_run(&self, wav_input: &[u8]) -> anyhow::Result> { + pub fn convert_and_run(&self, wav_input: &[u8]) -> anyhow::Result> { let device = Device::Cpu; let mut wav_input = std::io::Cursor::new(wav_input); let (header, data) = wav::read(&mut wav_input)?; diff --git a/candle-wasm-examples/whisper/whisperWorker.js b/candle-wasm-examples/whisper/whisperWorker.js new file mode 100644 index 00000000..2598adde --- /dev/null +++ b/candle-wasm-examples/whisper/whisperWorker.js @@ -0,0 +1,72 @@ +//load the candle Whisper decoder wasm module +import init, { Decoder } from "./build/m.js"; + +async function fetchArrayBuffer(url) { + const res = await fetch(url, { + cache: "force-cache", + headers: { + "Cache-Control": "public, max-age=31536000", + }, + }); + const data = await res.arrayBuffer(); + return new Uint8Array(data); +} + +class Whisper { + static instance = {}; + // Retrieve the Whisper model. When called for the first time, + // this will load the model and save it for future use. + static async getInstance(weightsURL, modelID, tokenizerURL, mel_filtersURL) { + // load individual modelID only once + if (!this.instance[modelID]) { + await init(); + + self.postMessage({ status: "loading", message: "Loading Model" }); + const [weightsArrayU8, tokenizerArrayU8, mel_filtersArrayU8] = + await Promise.all([ + fetchArrayBuffer(weightsURL), + fetchArrayBuffer(tokenizerURL), + fetchArrayBuffer(mel_filtersURL), + ]); + + this.instance[modelID] = new Decoder( + weightsArrayU8, + tokenizerArrayU8, + mel_filtersArrayU8 + ); + } else { + self.postMessage({ status: "loading", message: "Model Already Loaded" }); + } + return this.instance[modelID]; + } +} + +self.addEventListener("message", async (event) => { + const { weightsURL, modelID, tokenizerURL, mel_filtersURL, audioURL } = + event.data; + try { + self.postMessage({ status: "decoding", message: "Starting Decoder" }); + + const decoder = await Whisper.getInstance( + weightsURL, + modelID, + tokenizerURL, + mel_filtersURL + ); + + self.postMessage({ status: "decoding", message: "Loading Audio" }); + const audioArrayU8 = await fetchArrayBuffer(audioURL); + + self.postMessage({ status: "decoding", message: "Running Decoder..." }); + const segments = decoder.decode(audioArrayU8); + + // Send the segment back to the main thread as JSON + self.postMessage({ + status: "complete", + message: "complete", + output: JSON.parse(segments), + }); + } catch (e) { + self.postMessage({ error: e }); + } +});