Spaces:
Runtime error
Runtime error
File size: 2,563 Bytes
fd2651a cde52cf d342d8b fd2651a cde52cf fd2651a d342d8b fd2651a cde52cf d342d8b fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a 48f8af8 cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf fd2651a cde52cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
import torch
from PIL import Image
from threading import Thread
from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
import spaces
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
MODEL_ID = "HuggingFaceTB/SmolVLM-256M-Instruct"
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForVision2Seq.from_pretrained(
MODEL_ID,
torch_dtype=torch_dtype,
trust_remote_code=True
).to(device)
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
print(message)
print(history)
messages.extend(history)
images = []
if message["files"]:
pil_image = Image.open(message["files"][0]).convert("RGB")
images.append(pil_image)
current_user_message = {"role": "user", "content": message["text"]}
messages.append(current_user_message)
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(text=prompt, images=images, return_tensors="pt").to(device, torch_dtype)
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
response = ""
for new_text in streamer:
response += new_text
yield response
demo = gr.ChatInterface(
respond,
type='messages',
multimodal=True,
additional_inputs=[
gr.Textbox(value="You are a helpful and friendly multimodal assistant. You can analyze images and answer questions about them.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
title="Chatbot",
description="Ask me anything or upload an image. This version uses AutoModel and AutoProcessor directly.",
)
if __name__ == "__main__":
demo.launch() |