""" File: module_vision.py Description: A module for chat using image + text with a multimodal interface. Author: Didier Guillevic Date: 2025-05-08 """ import gradio as gr import vlm def process(message, history): """Generate the model response given message and history """ messages = vlm.build_messages(message, history) yield from vlm.stream_response(messages) examples=[ { 'files': ['./passport_jp.png',], 'text': 'Can you describe the image and present the information as a JSON document?' }, {'text': 'How can we rationalize quantum entanglement?'}, {'text': "Peux-tu expliquer le terme 'quantum spin'?"} ] # # User interface # with gr.Blocks() as demo: chat_interface = gr.ChatInterface( fn=process, description="Chat with text or text+image.", examples=examples, #cache_examples=False, stop_btn="Stop Generation", multimodal=True, type="messages" )