File size: 815 Bytes
e8b87f3
098a8f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8b87f3
 
098a8f9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
from PIL import Image
from transformers import AutoModel, AutoTokenizer

# Load the VLLM model and tokenizer
model_name = "mistralai/Pixtral-12B-2409"
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a function to generate text from an image
def generate_text(image):
    inputs = tokenizer(image, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], num_beams=4, no_repeat_ngram_size=2)
    text = outputs[0].cpu().numpy().decode("utf-8")
    return text

# Create a Gradio interface
demo = gr.Interface(
    fn=generate_text,
    inputs=["image"],
    outputs=["text"],
    title="Image-to-Text Demo",
    description="Generate text from an image using a VLLM model",
)

# Launch the Gradio interface
demo.launch()