import gradio as gr from transformers import pipeline import torch pipe = pipeline( "image-text-to-text", model="google/gemma-3-4b-it", device="cuda", torch_dtype=torch.bfloat16 ) def greet(name): messages = [ { "role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}] }, { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": name} ] } ] output = pipe(text=messages, max_new_tokens=200) return output demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch()