mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
BERT Wasm (#902)
* implement wasm module * add example to workspace * add UI explore semantic similiarity * change status messages * formatting * minor changes
This commit is contained in:
99
candle-wasm-examples/bert/utils.js
Normal file
99
candle-wasm-examples/bert/utils.js
Normal file
@ -0,0 +1,99 @@
|
||||
export async function getEmbeddings(
|
||||
worker,
|
||||
weightsURL,
|
||||
tokenizerURL,
|
||||
configURL,
|
||||
modelID,
|
||||
sentences,
|
||||
updateStatus = null
|
||||
) {
|
||||
return new Promise((resolve, reject) => {
|
||||
worker.postMessage({
|
||||
weightsURL,
|
||||
tokenizerURL,
|
||||
configURL,
|
||||
modelID,
|
||||
sentences,
|
||||
});
|
||||
function messageHandler(event) {
|
||||
if ("error" in event.data) {
|
||||
worker.removeEventListener("message", messageHandler);
|
||||
reject(new Error(event.data.error));
|
||||
}
|
||||
if (event.data.status === "complete") {
|
||||
worker.removeEventListener("message", messageHandler);
|
||||
resolve(event.data);
|
||||
}
|
||||
if (updateStatus) updateStatus(event.data);
|
||||
}
|
||||
worker.addEventListener("message", messageHandler);
|
||||
});
|
||||
}
|
||||
|
||||
const MODELS = {
|
||||
intfloat_e5_small_v2: {
|
||||
base_url: "https://huggingface.co/intfloat/e5-small-v2/resolve/main/",
|
||||
search_prefix: "query: ",
|
||||
document_prefix: "passage: ",
|
||||
},
|
||||
intfloat_e5_base_v2: {
|
||||
base_url: "https://huggingface.co/intfloat/e5-base-v2/resolve/main/",
|
||||
search_prefix: "query: ",
|
||||
document_prefix: "passage:",
|
||||
},
|
||||
intfloat_multilingual_e5_small: {
|
||||
base_url:
|
||||
"https://huggingface.co/intfloat/multilingual-e5-small/resolve/main/",
|
||||
search_prefix: "query: ",
|
||||
document_prefix: "passage: ",
|
||||
},
|
||||
sentence_transformers_all_MiniLM_L6_v2: {
|
||||
base_url:
|
||||
"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/refs%2Fpr%2F21/",
|
||||
search_prefix: "",
|
||||
document_prefix: "",
|
||||
},
|
||||
sentence_transformers_all_MiniLM_L12_v2: {
|
||||
base_url:
|
||||
"https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/refs%2Fpr%2F4/",
|
||||
search_prefix: "",
|
||||
document_prefix: "",
|
||||
},
|
||||
};
|
||||
export function getModelInfo(id) {
|
||||
return {
|
||||
modelURL: MODELS[id].base_url + "model.safetensors",
|
||||
configURL: MODELS[id].base_url + "config.json",
|
||||
tokenizerURL: MODELS[id].base_url + "tokenizer.json",
|
||||
search_prefix: MODELS[id].search_prefix,
|
||||
document_prefix: MODELS[id].document_prefix,
|
||||
};
|
||||
}
|
||||
|
||||
export function cosineSimilarity(vec1, vec2) {
|
||||
const dot = vec1.reduce((acc, val, i) => acc + val * vec2[i], 0);
|
||||
const a = Math.sqrt(vec1.reduce((acc, val) => acc + val * val, 0));
|
||||
const b = Math.sqrt(vec2.reduce((acc, val) => acc + val * val, 0));
|
||||
return dot / (a * b);
|
||||
}
|
||||
export async function getWikiText(article) {
|
||||
// thanks to wikipedia for the API
|
||||
const URL = `https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exlimit=1&titles=${article}&explaintext=1&exsectionformat=plain&format=json&origin=*`;
|
||||
return fetch(URL, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
},
|
||||
})
|
||||
.then((r) => r.json())
|
||||
.then((data) => {
|
||||
const pages = data.query.pages;
|
||||
const pageId = Object.keys(pages)[0];
|
||||
const extract = pages[pageId].extract;
|
||||
if (extract === undefined || extract === "") {
|
||||
throw new Error("No article found");
|
||||
}
|
||||
return extract;
|
||||
})
|
||||
.catch((error) => console.error("Error:", error));
|
||||
}
|
Reference in New Issue
Block a user