Spaces:
Running
Running
File size: 1,942 Bytes
f90ffff 13cb9dc f90ffff 13cb9dc 35bd577 13cb9dc 35bd577 13cb9dc 35bd577 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
/// <reference lib="webworker" />
// @ts-ignore
importScripts('https://cdn.jsdelivr.net/npm/@xenova/[email protected]/dist/transformers.min.js');
let processor: any = null;
let model: any = null;
let isLoaded = false;
const MODEL_ID = "onnx-community/FastVLM-0.5B-ONNX";
async function loadModelAndProcessor() {
if (isLoaded) return;
// @ts-ignore
processor = await window.transformers.AutoProcessor.from_pretrained(MODEL_ID);
// @ts-ignore
model = await window.transformers.AutoModelForImageTextToText.from_pretrained(MODEL_ID, {
dtype: {
embed_tokens: "fp16",
vision_encoder: "q4",
decoder_model_merged: "q4",
},
device: "webgpu",
});
isLoaded = true;
}
self.onmessage = async (event) => {
const { imageData, prompt } = event.data;
await loadModelAndProcessor();
// Convert imageData to RawImage
// @ts-ignore
const rawImg = new window.transformers.RawImage(
imageData.data,
imageData.width,
imageData.height,
4
);
const messages = [
{
role: "system",
content: `You are a helpful visual AI assistant. Respond concisely and accurately to the user's query in one sentence.`,
},
{ role: "user", content: `<image>${prompt}` },
];
// @ts-ignore
const chatPrompt = processor.apply_chat_template(messages, { add_generation_prompt: true });
// @ts-ignore
const inputs = await processor(rawImg, chatPrompt, { add_special_tokens: false });
// @ts-ignore
const outputs = await model.generate({
...inputs,
max_new_tokens: 512,
do_sample: false,
repetition_penalty: 1.2,
});
// @ts-ignore
const decoded = processor.batch_decode(outputs.slice(null, [inputs.input_ids.dims.at(-1), null]), {
skip_special_tokens: true,
});
// Return the decoded result (should be a string or JSON)
self.postMessage(decoded[0].trim());
};
export {}; |