import gradio as gr from huggingface_hub import InferenceClient import os import json import base64 from PIL import Image import io ACCESS_TOKEN = os.getenv("HF_TOKEN") print("Access token loaded.") def encode_image_to_base64(image): """Convert a PIL Image to a base64 string""" buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") return img_str def process_uploaded_images(images): """Process uploaded images and return image_url dicts for API submission""" if not images: return [] image_contents = [] for img in images: if isinstance(img, str): # Path to an image try: image = Image.open(img) base64_image = encode_image_to_base64(image) image_contents.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } }) except Exception as e: print(f"Error processing image {img}: {e}") else: # Already a PIL Image try: base64_image = encode_image_to_base64(img) image_contents.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } }) except Exception as e: print(f"Error processing uploaded image: {e}") return image_contents def respond( message, images, # New parameter for uploaded images history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, frequency_penalty, seed, provider, custom_api_key, custom_model, model_search_term, selected_model ): print(f"Received message: {message}") print(f"Received images: {len(images) if images else 0} image(s)") print(f"History: {history}") print(f"System message: {system_message}") print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}") print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}") print(f"Selected provider: {provider}") print(f"Custom API Key provided: {bool(custom_api_key.strip())}") print(f"Selected model (custom_model): {custom_model}") print(f"Model search term: {model_search_term}") print(f"Selected model from radio: {selected_model}") # Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN # Log which token source we're using (without printing the actual token) if custom_api_key.strip() != "": print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication") else: print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication") # Initialize the Inference Client with the provider and appropriate token client = InferenceClient(token=token_to_use, provider=provider) print(f"Hugging Face Inference Client initialized with {provider} provider.") # Convert seed to None if -1 (meaning random) if seed == -1: seed = None # Prepare messages in the format expected by the API messages = [{"role": "system", "content": system_message}] print("Initial messages array constructed.") # Add conversation history to the context for val in history: user_part = val[0] assistant_part = val[1] # Process user messages (could be multimodal) if user_part: # Check if the user message is already multimodal (from history) if isinstance(user_part, list): # Already in multimodal format, use as is messages.append({"role": "user", "content": user_part}) print("Added multimodal user message from history") else: # Simple text message messages.append({"role": "user", "content": user_part}) print(f"Added user message to context: {user_part}") # Process assistant messages (always text) if assistant_part: messages.append({"role": "assistant", "content": assistant_part}) print(f"Added assistant message to context: {assistant_part}") # Process the current message (could include images) current_message_content = [] # Add text content if provided if message and message.strip(): current_message_content.append({ "type": "text", "text": message }) # Process and add image content if provided if images: image_contents = process_uploaded_images(images) current_message_content.extend(image_contents) # Format the final message based on content if current_message_content: if len(current_message_content) == 1 and "type" in current_message_content[0] and current_message_content[0]["type"] == "text": # If only text, use simple string format for compatibility with all models messages.append({"role": "user", "content": current_message_content[0]["text"]}) print(f"Added simple text user message: {current_message_content[0]['text']}") else: # If multimodal content, use the array format messages.append({"role": "user", "content": current_message_content}) print(f"Added multimodal user message with {len(current_message_content)} parts") # Determine which model to use, prioritizing custom_model if provided model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model print(f"Model selected for inference: {model_to_use}") # Start with an empty string to build the response as tokens stream in response = "" print(f"Sending request to {provider} provider.") # Prepare parameters for the chat completion request parameters = { "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "frequency_penalty": frequency_penalty, } if seed is not None: parameters["seed"] = seed # Use the InferenceClient for making the request try: # Create a generator for the streaming response stream = client.chat_completion( model=model_to_use, messages=messages, stream=True, **parameters ) # Print a starting message for token streaming print("Received tokens: ", end="", flush=True) # Process the streaming response for chunk in stream: if hasattr(chunk, 'choices') and len(chunk.choices) > 0: # Extract the content from the response if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'): token_text = chunk.choices[0].delta.content if token_text: # Print tokens inline without newlines print(token_text, end="", flush=True) response += token_text yield response # Print a newline at the end of all tokens print() except Exception as e: print(f"Error during inference: {e}") response += f"\nError: {str(e)}" yield response print("Completed response generation.") # Function to validate provider selection based on BYOK def validate_provider(api_key, provider): # If no custom API key is provided, only "hf-inference" can be used if not api_key.strip() and provider != "hf-inference": return gr.update(value="hf-inference") return gr.update(value=provider) # Function to update featured model list based on search def filter_models(search_term): print(f"Filtering models with search term: {search_term}") filtered = [m for m in models_list if search_term.lower() in m.lower()] print(f"Filtered models: {filtered}") return gr.update(choices=filtered) def set_custom_model_from_radio(selected): """ This function will get triggered whenever someone picks a model from the 'Featured Models' radio. We will update the Custom Model text box with that selection automatically. """ print(f"Featured model selected: {selected}") return selected # Define multimodal models list multimodal_models_list = [ "meta-llama/Llama-3.3-70B-Vision-Instruct", "meta-llama/Llama-3.1-8B-Vision-Instruct", "Qwen/Qwen2.5-VL-7B-Chat", "Qwen/Qwen2.5-VL-3B-Chat", "microsoft/Phi-3-vision-instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "deepseek-ai/DeepSeek-VL-7B-Chat", "01-ai/Yi-VL-6B-Chat", "01-ai/Yi-VL-34B-Chat", "Cohere/command-vision-nightly", "LLaVA/llava-1.6-34b-hf", "fireworks-ai/FireworksBridge-Vision-Alpha", "liuhaotian/llava-v1.6-vicuna-13b", ] # Add multimodal models to the full model list models_list = [ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct", "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceTB/SmolLM2-360M-Instruct", "tiiuae/falcon-7b-instruct", "01-ai/Yi-1.5-34B-Chat", ] + multimodal_models_list # Add multimodal models to the list # Create a custom ChatBot class that will display images def format_history_with_images(history): """ Format history for display in the chatbot, handling multimodal content """ formatted_history = [] for user_msg, assistant_msg in history: # Process user message if isinstance(user_msg, list): # Multimodal message formatted_user_msg = [] for item in user_msg: if item.get("type") == "text": formatted_user_msg.append(item["text"]) elif item.get("type") == "image_url": # Extract the base64 image data img_url = item.get("image_url", {}).get("url", "") if img_url.startswith("data:image/"): formatted_user_msg.append((img_url, "image")) formatted_history.append((formatted_user_msg, assistant_msg)) else: # Regular text message formatted_history.append((user_msg, assistant_msg)) return formatted_history # GRADIO UI # Create a custom chatbot that can display images chatbot = gr.Chatbot( height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel" ) print("Chatbot interface created.") # Create a virtual column layout for the message input area with gr.Blocks() as msg_input: with gr.Row(): with gr.Column(scale=4): msg = gr.Textbox( placeholder="Enter text here or upload an image", show_label=False, container=False, lines=3 ) with gr.Column(scale=1, min_width=50): img_upload = gr.Image( type="pil", label="Upload Image", show_label=False, icon="🖼️", container=True, height=50, width=50 ) # Basic input components system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt") with gr.Accordion("Model Settings", open=False): with gr.Row(): with gr.Column(): max_tokens_slider = gr.Slider( minimum=1, maximum=4096, value=512, step=1, label="Max tokens" ) temperature_slider = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) with gr.Column(): top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P" ) frequency_penalty_slider = gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty" ) with gr.Row(): seed_slider = gr.Slider( minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)" ) with gr.Accordion("Model Selection", open=False): with gr.Row(): with gr.Column(): # Provider selection providers_list = [ "hf-inference", # Default Hugging Face Inference "cerebras", # Cerebras provider "together", # Together AI "sambanova", # SambaNova "novita", # Novita AI "cohere", # Cohere "fireworks-ai", # Fireworks AI "hyperbolic", # Hyperbolic "nebius", # Nebius ] provider_radio = gr.Radio( choices=providers_list, value="hf-inference", label="Inference Provider", info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)" ) # New BYOK textbox - Added for the new feature byok_textbox = gr.Textbox( value="", label="BYOK (Bring Your Own Key)", info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.", placeholder="Enter your Hugging Face API token", type="password" # Hide the API key for security ) with gr.Column(): # Model selection components model_search_box = gr.Textbox( label="Filter Models", placeholder="Search for a featured model...", lines=1 ) featured_model_radio = gr.Radio( label="Select a model below", choices=models_list, value="meta-llama/Llama-3.3-70B-Vision-Instruct", # Default to a multimodal model interactive=True ) # Custom model box custom_model_box = gr.Textbox( value="", label="Custom Model", info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.", placeholder="meta-llama/Llama-3.3-70B-Vision-Instruct" ) gr.Markdown("[See all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)") # Main Gradio interface with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: gr.Markdown("# 🤖 Serverless-MultiModal-Hub") with gr.Row(): with gr.Column(scale=3): # Display the chatbot chatbot_interface = chatbot # Custom submit function to handle multimodal inputs def submit_message(message, images, history): history = history or [] # Format the message content based on whether there are images if images: # Create a multimodal message format for history display user_msg = [] if message: user_msg.append({"type": "text", "text": message}) # Add each image as an image_url item for img in images: if img is not None: img_base64 = encode_image_to_base64(img) img_url = f"data:image/jpeg;base64,{img_base64}" user_msg.append({ "type": "image_url", "image_url": {"url": img_url} }) # Add to history history.append([user_msg, None]) else: # Text-only message if message: history.append([message, None]) else: # No content to submit return history return history # Create message input with gr.Group(): with gr.Row(): with gr.Column(scale=4): text_input = gr.Textbox( placeholder="Enter text here", show_label=False, container=False, lines=3 ) with gr.Column(scale=1, min_width=50): image_input = gr.Image( type="pil", label="Upload Image", show_label=False, sources=["upload", "clipboard"], tool="editor", height=100, visible=True ) # Submit button submit_btn = gr.Button("Submit", variant="primary") # Clear button clear_btn = gr.Button("Clear") with gr.Column(scale=1): # Put settings here system_message_box = gr.Textbox( value="", placeholder="You are a helpful assistant that can understand images.", label="System Prompt", lines=2 ) with gr.Accordion("Model Selection", open=False): # Provider selection provider_radio = gr.Radio( choices=providers_list, value="hf-inference", label="Inference Provider" ) # BYOK textbox byok_textbox = gr.Textbox( value="", label="API Key", placeholder="Enter provider API key", type="password" ) # Model selection components model_search_box = gr.Textbox( label="Filter Models", placeholder="Search models...", lines=1 ) featured_model_radio = gr.Radio( label="Models", choices=models_list, value="meta-llama/Llama-3.3-70B-Vision-Instruct", interactive=True ) custom_model_box = gr.Textbox( value="", label="Custom Model", placeholder="Enter model path" ) gr.Markdown("[View all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)") with gr.Accordion("Model Settings", open=False): max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens") temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P") frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty") seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)") # Connect the submit button submit_btn.click( fn=submit_message, inputs=[text_input, image_input, chatbot_interface], outputs=[chatbot_interface], queue=False ).then( fn=respond, inputs=[ text_input, image_input, chatbot_interface, system_message_box, max_tokens_slider, temperature_slider, top_p_slider, frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box, model_search_box, featured_model_radio ], outputs=[chatbot_interface], queue=True ).then( fn=lambda: (None, None), # Clear inputs after submission inputs=None, outputs=[text_input, image_input] ) # Clear button functionality clear_btn.click(lambda: None, None, chatbot_interface, queue=False) # Connect the model filter to update the radio choices model_search_box.change( fn=filter_models, inputs=model_search_box, outputs=featured_model_radio ) # Connect the featured model radio to update the custom model box featured_model_radio.change( fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box ) # Connect the BYOK textbox to validate provider selection byok_textbox.change( fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio ) # Also validate provider when the radio changes to ensure consistency provider_radio.change( fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio ) if __name__ == "__main__": print("Launching Serverless-MultiModal-Hub application.") demo.launch(show_api=True)