import gradio as gr from openai import OpenAI import os from datetime import datetime # App title and description APP_TITLE = "NO GPU, Multi LLMs Uses" APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU" # Load environment variables ACCESS_TOKEN = os.getenv("HF_TOKEN") print("Access token loaded.") client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) print("OpenAI client initialized.") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, frequency_penalty, seed, custom_model ): print(f"Received message: {message}") print(f"Selected model: {custom_model}") # Convert seed to None if -1 (meaning random) if seed == -1: seed = None messages = [{"role": "system", "content": system_message}] # Add conversation history to the context for val in history: user_part = val[0] assistant_part = val[1] if user_part: messages.append({"role": "user", "content": user_part}) if assistant_part: messages.append({"role": "assistant", "content": assistant_part}) # Append the latest user message messages.append({"role": "user", "content": message}) # If user provided a model, use that; otherwise, fall back to a default model model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct" # Start with an empty string to build the response as tokens stream in response = "" try: for message_chunk in client.chat.completions.create( model=model_to_use, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, frequency_penalty=frequency_penalty, seed=seed, messages=messages, ): token_text = message_chunk.choices[0].delta.content if token_text is not None: # Handle None type in response response += token_text yield response except Exception as e: yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later." print("Completed response generation.") # Model categories for better organization MODEL_CATEGORIES = { "Meta LLaMa": [ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", ], "Mistral": [ "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.2", ], "Qwen": [ "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", ], "Microsoft Phi": [ "microsoft/Phi-3.5-mini-instruct", "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct", ], "Other Models": [ "NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceTB/SmolLM2-360M-Instruct", "tiiuae/falcon-7b-instruct", "01-ai/Yi-1.5-34B-Chat", ] } # Flatten the model list for search functionality ALL_MODELS = [] for category, models in MODEL_CATEGORIES.items(): ALL_MODELS.extend(models) # Helper function to get model info display def get_model_info(model_name): """Extract and format model information for display""" parts = model_name.split('/') org = parts[0] model = parts[1] # Extract numbers from model name to determine size import re size_match = re.search(r'(\d+\.?\d*)B', model) size = size_match.group(1) + "B" if size_match else "Unknown" return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}" def filter_models(search_term): """Filter models based on search term across all categories""" if not search_term: return MODEL_CATEGORIES filtered_categories = {} for category, models in MODEL_CATEGORIES.items(): filtered_models = [m for m in models if search_term.lower() in m.lower()] if filtered_models: filtered_categories[category] = filtered_models return filtered_categories def update_model_display(search_term=""): """Update the model selection UI based on search term""" filtered_categories = filter_models(search_term) # Create HTML for model display html = "
" for category, models in filtered_categories.items(): html += f"

{category}

" for model in models: model_short = model.split('/')[-1] html += f"""
{model_short}
{model.split('/')[0]}
""" html += "
" if not filtered_categories: html += "

No models found matching your search.

" html += "
" return html # Create custom CSS for better styling custom_css = """ #app-container { max-width: 1200px; margin: 0 auto; padding: 20px; } #chat-container { border-radius: 12px; box-shadow: 0 8px 16px rgba(0,0,0,0.1); overflow: hidden; } .contain { background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%); } h1, h2, h3 { font-family: 'Poppins', sans-serif; } h1 { background: linear-gradient(90deg, #2b6cb0, #4299e1); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700; letter-spacing: -0.5px; margin-bottom: 8px; } .parameter-row { display: flex; gap: 10px; margin-bottom: 10px; } .model-card:hover { transform: translateY(-2px); box-shadow: 0 6px 12px rgba(0,0,0,0.15); border-color: #4299e1; } .tabs { box-shadow: 0 2px 10px rgba(0,0,0,0.05); border-radius: 8px; overflow: hidden; } .footer { text-align: center; margin-top: 20px; font-size: 0.8em; color: #666; } /* Status indicator styles */ .status-indicator { display: inline-block; width: 10px; height: 10px; border-radius: 50%; margin-right: 6px; } .status-active { background-color: #10B981; animation: pulse 2s infinite; } @keyframes pulse { 0% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7); } 70% { box-shadow: 0 0 0 5px rgba(16, 185, 129, 0); } 100% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0); } } /* Parameter tooltips */ .parameter-container { position: relative; } .parameter-info { display: none; position: absolute; background: white; border: 1px solid #ddd; padding: 10px; border-radius: 6px; box-shadow: 0 2px 5px rgba(0,0,0,0.2); z-index: 100; width: 250px; top: 100%; left: 10px; } .parameter-container:hover .parameter-info { display: block; } """ with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo: gr.HTML(f"""

{APP_TITLE}

{APP_DESCRIPTION}

Service Active Last Updated: {datetime.now().strftime('%Y-%m-%d')}
""") with gr.Row(): with gr.Column(scale=3): # Main chat interface with gr.Box(elem_id="chat-container"): chatbot = gr.Chatbot( height=550, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel" ) with gr.Row(): with gr.Column(scale=8): msg = gr.Textbox( show_label=False, placeholder="Type your message here...", container=False, scale=8 ) with gr.Column(scale=1, min_width=70): submit_btn = gr.Button("Send", variant="primary", scale=1) with gr.Accordion("Conversation Settings", open=False): system_message_box = gr.Textbox( value="You are a helpful assistant.", placeholder="System prompt that guides the assistant's behavior", label="System Prompt", lines=2 ) with gr.Tabs(elem_classes="tabs"): with gr.TabItem("Basic Parameters"): with gr.Row(elem_classes="parameter-row"): with gr.Column(): max_tokens_slider = gr.Slider( minimum=1, maximum=4096, value=512, step=1, label="Max new tokens" ) with gr.Column(): temperature_slider = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) with gr.TabItem("Advanced Parameters"): with gr.Row(elem_classes="parameter-row"): with gr.Column(): top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P" ) with gr.Column(): frequency_penalty_slider = gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty" ) seed_slider = gr.Slider( minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)" ) with gr.Column(scale=2): # Model selection panel with gr.Box(): gr.HTML("

Model Selection

") # Custom model input (this is what the respond function sees) custom_model_box = gr.Textbox( value="meta-llama/Llama-3.3-70B-Instruct", label="Selected Model", elem_id="custom-model-input" ) # Search box model_search_box = gr.Textbox( label="Search Models", placeholder="Type to filter models...", lines=1 ) # Dynamic model display area model_display = gr.HTML(update_model_display()) # Model information display gr.HTML("

Current Model Info

") model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct")) # Footer gr.HTML(""" """) # Set up event handlers msg.submit( fn=respond, inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box], outputs=[chatbot], queue=True ) submit_btn.click( fn=respond, inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box], outputs=[chatbot], queue=True ) # Update model display when search changes model_search_box.change( fn=lambda x: update_model_display(x), inputs=model_search_box, outputs=model_display ) # Update model info when selection changes custom_model_box.change( fn=lambda x: get_model_info(x), inputs=custom_model_box, outputs=model_info_display ) if __name__ == "__main__": print("Launching the enhanced multi-model chat interface.") demo.launch()