diff --git a/candle-wasm-examples/llama2-c/lib-example.html b/candle-wasm-examples/llama2-c/lib-example.html index bc519e4b..5995f003 100644 --- a/candle-wasm-examples/llama2-c/lib-example.html +++ b/candle-wasm-examples/llama2-c/lib-example.html @@ -60,23 +60,30 @@ const seed = getValue("seed"); const maxSeqLen = getValue("max-seq"); - function updateStatus({ status, message, prompt, sentence }) { + function updateStatus(data) { const outStatus = document.querySelector("#output-status"); const outGen = document.querySelector("#output-generation"); + const outCounter = document.querySelector("#output-counter"); - switch (status) { + switch (data.status) { case "loading": outStatus.hidden = false; - outStatus.textContent = message; + outStatus.textContent = data.message; outGen.hidden = true; + outCounter.hidden = true; break; case "generating": + const { message, prompt, sentence, tokensSec, totalTime } = data; outStatus.hidden = true; + outCounter.hidden = false; outGen.hidden = false; outGen.innerHTML = `${prompt}${sentence.replace( /\|\<\/s\>/g, "" )}`; + outCounter.innerHTML = `${(totalTime / 1000).toFixed( + 2 + )}s (${tokensSec.toFixed(2)} tok/s)`; break; case "complete": outStatus.hidden = true; @@ -206,8 +213,9 @@ id="prompt" class="font-light w-full px-3 py-2 mx-1 resize-none outline-none" placeholder="Add your prompt here..." + value="Once upon a time" /> - + Run @@ -291,17 +298,26 @@ value="299792458" class="font-light border border-gray-700 text-right rounded-md p-2" /> + + Rand + Generation: - - - + + No output yet diff --git a/candle-wasm-examples/llama2-c/llama2cWorker.js b/candle-wasm-examples/llama2-c/llama2cWorker.js index ba303aaa..e4229055 100644 --- a/candle-wasm-examples/llama2-c/llama2cWorker.js +++ b/candle-wasm-examples/llama2-c/llama2cWorker.js @@ -60,9 +60,10 @@ async function generate(data) { const seq_len = model.get_seq_len(); let sentence = ""; - let max_tokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1; - - while (max_tokens--) { + let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1; + let startTime = performance.now(); + let tokensCount = 0; + while (tokensCount < maxTokens) { await new Promise(async (resolve) => { if (controller && controller.signal.aborted) { self.postMessage({ @@ -73,6 +74,8 @@ async function generate(data) { return; } const token = await model.next_token(); + const tokensSec = + ((tokensCount + 1) / (performance.now() - startTime)) * 1000; sentence += token; self.postMessage({ @@ -80,10 +83,13 @@ async function generate(data) { message: "Generating token", token: token, sentence: sentence, + totalTime: performance.now() - startTime, + tokensSec, prompt: prompt, }); setTimeout(resolve, 0); }); + tokensCount++; } self.postMessage({ status: "complete", diff --git a/candle-wasm-examples/whisper/lib-example.html b/candle-wasm-examples/whisper/lib-example.html index a8c49785..ad48072b 100644 --- a/candle-wasm-examples/whisper/lib-example.html +++ b/candle-wasm-examples/whisper/lib-example.html @@ -141,7 +141,9 @@ const { output } = result; const text = output.map((segment) => segment.dr.text).join(" "); console.log(text); - document.getElementById("output").textContent = text; + document.querySelector("#output-status").hidden = true; + document.querySelector("#output-generation").hidden = false; + document.querySelector("#output-generation").textContent = text; }) .catch((error) => { console.error(error); @@ -295,18 +297,21 @@ Transcribe Audio Transcription: - + class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2" + > + + No transcription results yet +