Spaces:

keeperballon
/

multi-llm

Running

File size: 16,139 Bytes

import gradio as gr
from openai import OpenAI
import os
from datetime import datetime

# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")


def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"Selected model: {custom_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # If user provided a model, use that; otherwise, fall back to a default model
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
    
    # Create a copy of the history and add the new user message
    new_history = list(history)
    new_history.append((message, ""))
    current_response = ""
    
    try:
        for message_chunk in client.chat.completions.create(
            model=model_to_use,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            seed=seed,
            messages=messages,
        ):
            token_text = message_chunk.choices[0].delta.content
            if token_text is not None:  # Handle None type in response
                current_response += token_text
                # Update just the last message in history
                new_history[-1] = (message, current_response)
                yield new_history
    except Exception as e:
        error_message = f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."
        new_history[-1] = (message, error_message)
        yield new_history

    print("Completed response generation.")


# Model categories for better organization
MODEL_CATEGORIES = {
    "Qwen": [
        "Qwen/Qwen3-235B-A22B",
        "Qwen/Qwen3-32B",
        "Qwen/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "Qwen/QwQ-32B",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
    ],
    "Meta LLaMa": [
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.1-70B-Instruct",
        "meta-llama/Llama-3.0-70B-Instruct",
        "meta-llama/Llama-3.2-3B-Instruct",
        "meta-llama/Llama-3.2-1B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
    ],
    "Mistral": [
        "mistralai/Mistral-Nemo-Instruct-2407",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.2",
    ],
    "Microsoft Phi": [
        "microsoft/Phi-3.5-mini-instruct",
        "microsoft/Phi-3-mini-128k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
    ],
    "Other Models": [
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "HuggingFaceH4/zephyr-7b-beta",
        "HuggingFaceTB/SmolLM2-360M-Instruct",
        "tiiuae/falcon-7b-instruct",
        "01-ai/Yi-1.5-34B-Chat",
    ]
}

# Flatten the model list for search functionality
ALL_MODELS = []
for category, models in MODEL_CATEGORIES.items():
    ALL_MODELS.extend(models)


def get_model_info(model_name):
    """Extract and format model information for display"""
    parts = model_name.split('/')
    if len(parts) != 2:
        return f"**Model:** {model_name}\n**Format:** Unknown"
    
    org = parts[0]
    model = parts[1]
    
    # Extract numbers from model name to determine size
    import re
    size_match = re.search(r'(\d+\.?\d*)B', model)
    size = size_match.group(1) + "B" if size_match else "Unknown"
    
    return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"


def set_model_and_update_info(model_name):
    """Set the selected model and update the model info display"""
    # This function is called when a model is selected (either clicked or via API)
    try:
        # Get model info
        model_info = get_model_info(model_name)
        
        # Return both the model name and the model info
        return model_name, model_info
    except Exception as e:
        print(f"Error in set_model_and_update_info: {e}")
        return model_name, f"**Error loading model info**: {str(e)}"


def filter_models(search_term):
    """Filter models based on search term across all categories"""
    if not search_term:
        return MODEL_CATEGORIES
    
    filtered_categories = {}
    for category, models in MODEL_CATEGORIES.items():
        filtered_models = [m for m in models if search_term.lower() in m.lower()]
        if filtered_models:
            filtered_categories[category] = filtered_models
    
    return filtered_categories


def update_model_display(search_term=""):
    """Update the model selection UI based on search term"""
    filtered_categories = filter_models(search_term)
    
    # Create HTML for model display with a more direct approach
    html = """
    <div style='max-height: 400px; overflow-y: auto;'>
    <script>
    // Direct model selection function - more reliable
    function selectModel(modelName) {
        // Get the textbox element by its ID
        const modelInput = document.getElementById('custom-model-input');
        if (modelInput) {
            // Set the value
            modelInput.value = modelName;
            
            // Create and dispatch change event
            const event = new Event('change', { bubbles: true });
            modelInput.dispatchEvent(event);
            
            // Look for the hidden trigger button and click it
            const triggerBtn = document.querySelector('button[value="Select Model"]');
            if (triggerBtn) {
                triggerBtn.click();
            }
            
            console.log('Selected model:', modelName);
        } else {
            console.error('Model input element not found');
        }
    }
    </script>
    """
    
    # Add models by category
    for category, models in filtered_categories.items():
        html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
        
        for model in models:
            model_short = model.split('/')[-1]
            escaped_model = model.replace("'", "\\'").replace('"', '\\"')
            html += f"""
            <div class='model-card' 
                 style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
                        background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'
                 onclick="selectModel('{escaped_model}')">
                <div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
                <div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
            </div>
            """
        html += "</div>"
    
    if not filtered_categories:
        html += "<p>No models found matching your search.</p>"
    
    html += "</div>"
    return html


# Create custom CSS for better styling
custom_css = """
#app-container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
}

#chat-container {
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
    overflow: hidden;
    border: 1px solid #e0e0e0;
}

.contain {
    background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
}

h1, h2, h3 {
    font-family: 'Poppins', sans-serif;
}

h1 {
    background: linear-gradient(90deg, #2b6cb0, #4299e1);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 700;
    letter-spacing: -0.5px;
    margin-bottom: 8px;
}

.parameter-row {
    display: flex;
    gap: 10px;
    margin-bottom: 10px;
}

.model-card:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px rgba(0,0,0,0.15);
    border-color: #4299e1;
}

.footer {
    text-align: center;
    margin-top: 20px;
    font-size: 0.8em;
    color: #666;
}

/* Status indicator styles */
.status-indicator {
    display: inline-block;
    width: 10px;
    height: 10px;
    border-radius: 50%;
    margin-right: 6px;
}

.status-active {
    background-color: #10B981;
    animation: pulse 2s infinite;
}

@keyframes pulse {
    0% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
    }
    70% {
        box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
    }
    100% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
    }
}
"""

with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
    gr.HTML(f"""
    <div id="app-container">
        <div style="text-align: center; padding: 20px 0;">
            <h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
            <p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
            <div style="margin-top: 10px;">
                <span class="status-indicator status-active"></span>
                <span>Service Active</span>
                <span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
            </div>
        </div>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            # Model selection panel - MOVED TO THE LEFT SIDE
            gr.HTML("<div style='border: 1px solid #e0e0e0; border-radius: 10px; padding: 15px;'>")
            gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
            
            # Custom model input (this is what the respond function sees)
            custom_model_box = gr.Textbox(
                value="Qwen/Qwen3-32B",  # Changed default model to Qwen
                label="Selected Model",
                elem_id="custom-model-input"
            )
            
            # Search box
            model_search_box = gr.Textbox(
                label="Search Models",
                placeholder="Type to filter models...",
                lines=1
            )
            
            # Dynamic model display area
            model_display = gr.HTML(update_model_display())
            
            # Model information display
            gr.HTML("<h4>Current Model Info</h4>")
            model_info_display = gr.Markdown(get_model_info("Qwen/Qwen3-32B"))
            gr.HTML("</div>")
        
        with gr.Column(scale=3):
            # Main chat interface
            chatbot = gr.Chatbot(
                height=550, 
                show_copy_button=True, 
                placeholder="Select a model and begin chatting", 
                layout="panel",
                elem_id="chat-container"
            )
            
            with gr.Row():
                with gr.Column(scale=8):
                    msg = gr.Textbox(
                        show_label=False,
                        placeholder="Type your message here...",
                        container=False,
                        scale=8
                    )
                with gr.Column(scale=1, min_width=70):
                    submit_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Accordion("Conversation Settings", open=False):
                system_message_box = gr.Textbox(
                    value="You are a helpful assistant.", 
                    placeholder="System prompt that guides the assistant's behavior", 
                    label="System Prompt",
                    lines=2
                )
                
                # Use standard Row/Column layout instead of tabs that might not be available
                gr.HTML("<h3>Basic Parameters</h3>")
                with gr.Row():
                    with gr.Column():
                        max_tokens_slider = gr.Slider(
                            minimum=1,
                            maximum=4096,
                            value=512,
                            step=1,
                            label="Max new tokens"
                        )
                    with gr.Column():
                        temperature_slider = gr.Slider(
                            minimum=0.1,
                            maximum=4.0,
                            value=0.7,
                            step=0.1,
                            label="Temperature"
                        )
                
                gr.HTML("<h3>Advanced Parameters</h3>")
                with gr.Row():
                    with gr.Column():
                        top_p_slider = gr.Slider(
                            minimum=0.1,
                            maximum=1.0,
                            value=0.95,
                            step=0.05,
                            label="Top-P"
                        )
                    with gr.Column():
                        frequency_penalty_slider = gr.Slider(
                            minimum=-2.0,
                            maximum=2.0,
                            value=0.0,
                            step=0.1,
                            label="Frequency Penalty"
                        )
                
                seed_slider = gr.Slider(
                    minimum=-1,
                    maximum=65535,
                    value=-1,
                    step=1,
                    label="Seed (-1 for random)"
                )
    
    # Footer
    gr.HTML("""
    <div class="footer">
        <p>Created with Gradio • Powered by Hugging Face Inference API</p>
        <p>This interface allows you to chat with various language models without requiring a GPU</p>
    </div>
    """)
    
    # Add a hidden button to trigger model selection via JavaScript
    trigger_model_selection = gr.Button("Select Model", visible=False)
    
    # Set up event handlers
    msg.submit(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=chatbot,
        queue=True
    ).then(
        lambda: "", # Clear the message box after sending
        None,
        [msg]
    )
    
    submit_btn.click(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=chatbot,
        queue=True
    ).then(
        lambda: "", # Clear the message box after sending
        None,
        [msg]
    )
    
    # Update model display when search changes
    model_search_box.change(
        fn=lambda x: update_model_display(x),
        inputs=model_search_box,
        outputs=model_display
    )
    
    # Update model info when selection changes
    custom_model_box.change(
        fn=set_model_and_update_info,
        inputs=custom_model_box,
        outputs=[custom_model_box, model_info_display]
    )
    
    # Connect the hidden trigger button to update model info
    trigger_model_selection.click(
        fn=set_model_and_update_info,
        inputs=custom_model_box,
        outputs=[custom_model_box, model_info_display]
    )

if __name__ == "__main__":
    print("Launching the enhanced multi-model chat interface.")
    demo.launch()