import gradio as gr from collections.abc import Generator from openai import OpenAI # from gradio.chat_interface import ChatInterface from chat_interface import ChatInterface USERNAME = "ahmedheakl" SPACE_NAME = "AIN-Arabic-VLM" TITLE = "Welcome to AIN Chatbot" DESCRIPTION = "Welcome to the AIN Arabic VLM chatbot. The most comprehensive Arabic-English LMM developed by MBZUAI." TOP_N_HISTORY = 2 LOGO_PATH = "https://huggingface.co/spaces/ahmedheakl/AIN-Arabic-VLM/resolve/main/logo.jpeg" def get_gradio_url(path: str) -> str: # If path is already an absolute URL, return it as is. if path.startswith("http"): return path return f"https://{USERNAME}-{SPACE_NAME}.hf.space/gradio_api/file={path}" def history_to_messages(history: list) -> list: messages = [] merge = False for i, h in enumerate(history): content = h.get("content", []) role = h.get("role", "") ct = [] if isinstance(content, tuple): src_path = content[0] ct = [{"type": "image_url", "image_url": {"url": get_gradio_url(src_path)}}] else: ct = [{"type": "text", "text": content}] if merge: messages[-1]["content"].extend(ct) merge = False else: messages.append({"role": role, "content": ct}) if i < len(history) - 1 and role == history[i + 1].get("role", ""): merge = True return messages def load_chat( base_url: str, model: str, token: str | None = None, *, system_message: str | None = None, **kwargs, ) -> gr.ChatInterface: client = OpenAI(api_key=token, base_url=base_url) start_message = ( [{"role": "system", "content": system_message}] if system_message else [] ) def open_api_stream( message: str, history: list | None ) -> Generator[str, None, None]: history = history or start_message print(history) if len(history) > 0 and isinstance(history[0], (list, tuple)): history = history[-TOP_N_HISTORY:] history = ChatInterface._tuples_to_messages(history) elif len(history) > 0 and isinstance(history[0], dict): history = history_to_messages(history) history = history[-TOP_N_HISTORY:] files = message.get('files', []) text = message.get('text', 'Describe the photo in detail.') if text.strip() == "": text = 'Describe the photo in detail.' content = [ {"type": "text", "text": text} ] if files: src_path = files[0] content.append({"type": "image_url", "image_url": {"url": get_gradio_url(src_path)}}) messages = history + [{"role": "user", "content": content}] stream = client.chat.completions.create( model=model, messages=messages, stream=True, ) response = "" for chunk in stream: if chunk.choices[0].delta.content is not None: response += chunk.choices[0].delta.content yield response return gr.ChatInterface( open_api_stream, type="messages", **kwargs, ) load_chat( "https://f362-5-195-0-150.ngrok-free.app/v1", model="test", token="ollama", multimodal=True, title=TITLE, description=DESCRIPTION, theme="ocean", # examples=[ # { # "text": "أخبرني ما اسم المبنى الموجود في الصورة والشيء المثير للاهتمام فيه", # "files": ["https://cdn.mos.cms.futurecdn.net/5HrnHp9ybAqYrtruKAsfkN-1200-80.jpg"], # }, # { # "text": "ما هو العلم الموجود في الصورة؟", # "files": ["https://mtc.ae/wp-content/uploads/2023/09/Satin-UAE-Flag-UAE-F-B-Blank.jpg"], # }, # { # "text": "How many people are there in the image?", # "files": ["https://i0.wp.com/eatpitapita.com/wp-content/uploads/2020/02/Arab-Muslim-or-Middle-Eastern-Preview.jpg"] # }, # ], # cache_examples=False ).queue().launch(allowed_paths=["/static"])