multi-llm / app.py
keeperballon's picture
Update app.py
8f82122 verified
raw
history blame
16.1 kB
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime
# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
custom_model
):
print(f"Received message: {message}")
print(f"Selected model: {custom_model}")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
# Add conversation history to the context
for val in history:
user_part = val[0]
assistant_part = val[1]
if user_part:
messages.append({"role": "user", "content": user_part})
if assistant_part:
messages.append({"role": "assistant", "content": assistant_part})
# Append the latest user message
messages.append({"role": "user", "content": message})
# If user provided a model, use that; otherwise, fall back to a default model
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
# Create a copy of the history and add the new user message
new_history = list(history)
new_history.append((message, ""))
current_response = ""
try:
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
token_text = message_chunk.choices[0].delta.content
if token_text is not None: # Handle None type in response
current_response += token_text
# Update just the last message in history
new_history[-1] = (message, current_response)
yield new_history
except Exception as e:
error_message = f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."
new_history[-1] = (message, error_message)
yield new_history
print("Completed response generation.")
# Model categories for better organization
MODEL_CATEGORIES = {
"Qwen": [
"Qwen/Qwen3-235B-A22B",
"Qwen/Qwen3-32B",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/QwQ-32B",
"Qwen/Qwen2.5-Coder-32B-Instruct",
],
"Meta LLaMa": [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct",
"meta-llama/Llama-3.0-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
],
"Mistral": [
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"mistralai/Mistral-7B-Instruct-v0.2",
],
"Microsoft Phi": [
"microsoft/Phi-3.5-mini-instruct",
"microsoft/Phi-3-mini-128k-instruct",
"microsoft/Phi-3-mini-4k-instruct",
],
"Other Models": [
"NousResearch/Hermes-3-Llama-3.1-8B",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"HuggingFaceH4/zephyr-7b-beta",
"HuggingFaceTB/SmolLM2-360M-Instruct",
"tiiuae/falcon-7b-instruct",
"01-ai/Yi-1.5-34B-Chat",
]
}
# Flatten the model list for search functionality
ALL_MODELS = []
for category, models in MODEL_CATEGORIES.items():
ALL_MODELS.extend(models)
def get_model_info(model_name):
"""Extract and format model information for display"""
parts = model_name.split('/')
if len(parts) != 2:
return f"**Model:** {model_name}\n**Format:** Unknown"
org = parts[0]
model = parts[1]
# Extract numbers from model name to determine size
import re
size_match = re.search(r'(\d+\.?\d*)B', model)
size = size_match.group(1) + "B" if size_match else "Unknown"
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
def set_model_and_update_info(model_name):
"""Set the selected model and update the model info display"""
# This function is called when a model is selected (either clicked or via API)
try:
# Get model info
model_info = get_model_info(model_name)
# Return both the model name and the model info
return model_name, model_info
except Exception as e:
print(f"Error in set_model_and_update_info: {e}")
return model_name, f"**Error loading model info**: {str(e)}"
def filter_models(search_term):
"""Filter models based on search term across all categories"""
if not search_term:
return MODEL_CATEGORIES
filtered_categories = {}
for category, models in MODEL_CATEGORIES.items():
filtered_models = [m for m in models if search_term.lower() in m.lower()]
if filtered_models:
filtered_categories[category] = filtered_models
return filtered_categories
def update_model_display(search_term=""):
"""Update the model selection UI based on search term"""
filtered_categories = filter_models(search_term)
# Create HTML for model display with a more direct approach
html = """
<div style='max-height: 400px; overflow-y: auto;'>
<script>
// Direct model selection function - more reliable
function selectModel(modelName) {
// Get the textbox element by its ID
const modelInput = document.getElementById('custom-model-input');
if (modelInput) {
// Set the value
modelInput.value = modelName;
// Create and dispatch change event
const event = new Event('change', { bubbles: true });
modelInput.dispatchEvent(event);
// Look for the hidden trigger button and click it
const triggerBtn = document.querySelector('button[value="Select Model"]');
if (triggerBtn) {
triggerBtn.click();
}
console.log('Selected model:', modelName);
} else {
console.error('Model input element not found');
}
}
</script>
"""
# Add models by category
for category, models in filtered_categories.items():
html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
for model in models:
model_short = model.split('/')[-1]
escaped_model = model.replace("'", "\\'").replace('"', '\\"')
html += f"""
<div class='model-card'
style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'
onclick="selectModel('{escaped_model}')">
<div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
<div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
</div>
"""
html += "</div>"
if not filtered_categories:
html += "<p>No models found matching your search.</p>"
html += "</div>"
return html
# Create custom CSS for better styling
custom_css = """
#app-container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
#chat-container {
border-radius: 12px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
overflow: hidden;
border: 1px solid #e0e0e0;
}
.contain {
background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
}
h1, h2, h3 {
font-family: 'Poppins', sans-serif;
}
h1 {
background: linear-gradient(90deg, #2b6cb0, #4299e1);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 700;
letter-spacing: -0.5px;
margin-bottom: 8px;
}
.parameter-row {
display: flex;
gap: 10px;
margin-bottom: 10px;
}
.model-card:hover {
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(0,0,0,0.15);
border-color: #4299e1;
}
.footer {
text-align: center;
margin-top: 20px;
font-size: 0.8em;
color: #666;
}
/* Status indicator styles */
.status-indicator {
display: inline-block;
width: 10px;
height: 10px;
border-radius: 50%;
margin-right: 6px;
}
.status-active {
background-color: #10B981;
animation: pulse 2s infinite;
}
@keyframes pulse {
0% {
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
}
70% {
box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
}
}
"""
with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.HTML(f"""
<div id="app-container">
<div style="text-align: center; padding: 20px 0;">
<h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
<p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
<div style="margin-top: 10px;">
<span class="status-indicator status-active"></span>
<span>Service Active</span>
<span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
</div>
</div>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Model selection panel - MOVED TO THE LEFT SIDE
gr.HTML("<div style='border: 1px solid #e0e0e0; border-radius: 10px; padding: 15px;'>")
gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
# Custom model input (this is what the respond function sees)
custom_model_box = gr.Textbox(
value="Qwen/Qwen3-32B", # Changed default model to Qwen
label="Selected Model",
elem_id="custom-model-input"
)
# Search box
model_search_box = gr.Textbox(
label="Search Models",
placeholder="Type to filter models...",
lines=1
)
# Dynamic model display area
model_display = gr.HTML(update_model_display())
# Model information display
gr.HTML("<h4>Current Model Info</h4>")
model_info_display = gr.Markdown(get_model_info("Qwen/Qwen3-32B"))
gr.HTML("</div>")
with gr.Column(scale=3):
# Main chat interface
chatbot = gr.Chatbot(
height=550,
show_copy_button=True,
placeholder="Select a model and begin chatting",
layout="panel",
elem_id="chat-container"
)
with gr.Row():
with gr.Column(scale=8):
msg = gr.Textbox(
show_label=False,
placeholder="Type your message here...",
container=False,
scale=8
)
with gr.Column(scale=1, min_width=70):
submit_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Accordion("Conversation Settings", open=False):
system_message_box = gr.Textbox(
value="You are a helpful assistant.",
placeholder="System prompt that guides the assistant's behavior",
label="System Prompt",
lines=2
)
# Use standard Row/Column layout instead of tabs that might not be available
gr.HTML("<h3>Basic Parameters</h3>")
with gr.Row():
with gr.Column():
max_tokens_slider = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max new tokens"
)
with gr.Column():
temperature_slider = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
gr.HTML("<h3>Advanced Parameters</h3>")
with gr.Row():
with gr.Column():
top_p_slider = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
with gr.Column():
frequency_penalty_slider = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
seed_slider = gr.Slider(
minimum=-1,
maximum=65535,
value=-1,
step=1,
label="Seed (-1 for random)"
)
# Footer
gr.HTML("""
<div class="footer">
<p>Created with Gradio • Powered by Hugging Face Inference API</p>
<p>This interface allows you to chat with various language models without requiring a GPU</p>
</div>
""")
# Add a hidden button to trigger model selection via JavaScript
trigger_model_selection = gr.Button("Select Model", visible=False)
# Set up event handlers
msg.submit(
fn=respond,
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
outputs=chatbot,
queue=True
).then(
lambda: "", # Clear the message box after sending
None,
[msg]
)
submit_btn.click(
fn=respond,
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
outputs=chatbot,
queue=True
).then(
lambda: "", # Clear the message box after sending
None,
[msg]
)
# Update model display when search changes
model_search_box.change(
fn=lambda x: update_model_display(x),
inputs=model_search_box,
outputs=model_display
)
# Update model info when selection changes
custom_model_box.change(
fn=set_model_and_update_info,
inputs=custom_model_box,
outputs=[custom_model_box, model_info_display]
)
# Connect the hidden trigger button to update model info
trigger_model_selection.click(
fn=set_model_and_update_info,
inputs=custom_model_box,
outputs=[custom_model_box, model_info_display]
)
if __name__ == "__main__":
print("Launching the enhanced multi-model chat interface.")
demo.launch()