T5 Wasm (#918)

* init t5 wasm model * split workers for each model * clean up * add some ui * readme * index * typo * remove cache param, clear_kv_cache * add max_length as param * add model tasks option to ui * add method to load quantized gguf from buffer * Add quantized wasm module * add quantized models to UI, dynamic import wasms * link to quantized * fix copy * fix ModelEncoder * fix README.md
2025-06-17 19:18:50 +00:00 · 2023-09-22 07:31:10 -07:00
parent 8601537e31
commit 19e52e5007
12 changed files with 1131 additions and 0 deletions
--- a/candle-wasm-examples/t5/T5ModelEncoderWorker.js
+++ b/candle-wasm-examples/t5/T5ModelEncoderWorker.js
@ -0,0 +1,83 @@
+//load Candle Bert Module wasm module
+let init, ModelEncoder;
+
+async function fetchArrayBuffer(url) {
+  const cacheName = "t5-candle-cache";
+  const cache = await caches.open(cacheName);
+  const cachedResponse = await cache.match(url);
+  if (cachedResponse) {
+    const data = await cachedResponse.arrayBuffer();
+    return new Uint8Array(data);
+  }
+  const res = await fetch(url, { cache: "force-cache" });
+  cache.put(url, res.clone());
+  return new Uint8Array(await res.arrayBuffer());
+}
+class Encoder {
+  static instance = {};
+
+  static async getInstance(weightsURL, tokenizerURL, configURL, modelID) {
+    if (modelID.includes("quantized")) {
+      ({ default: init, ModelEncoder } = await import(
+        "./build/m-quantized.js"
+      ));
+    } else {
+      ({ default: init, ModelEncoder } = await import("./build/m.js"));
+    }
+    if (!this.instance[modelID]) {
+      await init();
+
+      self.postMessage({ status: "loading", message: "Loading Model" });
+      const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
+        await Promise.all([
+          fetchArrayBuffer(weightsURL),
+          fetchArrayBuffer(tokenizerURL),
+          fetchArrayBuffer(configURL),
+        ]);
+
+      this.instance[modelID] = new ModelEncoder(
+        weightsArrayU8,
+        tokenizerArrayU8,
+        configArrayU8
+      );
+    } else {
+      self.postMessage({ status: "ready", message: "Model Already Loaded" });
+    }
+    return this.instance[modelID];
+  }
+}
+
+self.addEventListener("message", async (event) => {
+  const {
+    weightsURL,
+    tokenizerURL,
+    configURL,
+    modelID,
+    sentences,
+    normalize_embeddings,
+  } = event.data;
+  try {
+    self.postMessage({ status: "ready", message: "Starting T5 Encoder" });
+    const model = await Encoder.getInstance(
+      weightsURL,
+      tokenizerURL,
+      configURL,
+      modelID
+    );
+    self.postMessage({
+      status: "encoding",
+      message: "Encoding Sentences",
+    });
+    const output = model.decode({
+      sentences: sentences,
+      normalize_embeddings: normalize_embeddings || true,
+    });
+    self.postMessage({
+      status: "complete",
+      message: "complete",
+      output: output,
+    });
+  } catch (e) {
+    self.postMessage({ error: e });
+  }
+});