Spaces:

keeperballon
/

multi-llm

Running

App Files Files Community

multi-llm / app.py

keeperballon

Update app.py

8f82122 verified about 1 month ago

raw

history blame

16.1 kB

	import gradio as gr
	from openai import OpenAI
	import os
	from datetime import datetime

	# App title and description
	APP_TITLE = "NO GPU, Multi LLMs Uses"
	APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

	# Load environment variables
	ACCESS_TOKEN = os.getenv("HF_TOKEN")
	print("Access token loaded.")

	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)
	print("OpenAI client initialized.")


	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	frequency_penalty,
	seed,
	custom_model
	):
	print(f"Received message: {message}")
	print(f"Selected model: {custom_model}")

	# Convert seed to None if -1 (meaning random)
	if seed == -1:
	seed = None

	messages = [{"role": "system", "content": system_message}]

	# Add conversation history to the context
	for val in history:
	user_part = val[0]
	assistant_part = val[1]
	if user_part:
	messages.append({"role": "user", "content": user_part})
	if assistant_part:
	messages.append({"role": "assistant", "content": assistant_part})

	# Append the latest user message
	messages.append({"role": "user", "content": message})

	# If user provided a model, use that; otherwise, fall back to a default model
	model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"

	# Create a copy of the history and add the new user message
	new_history = list(history)
	new_history.append((message, ""))
	current_response = ""

	try:
	for message_chunk in client.chat.completions.create(
	model=model_to_use,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	frequency_penalty=frequency_penalty,
	seed=seed,
	messages=messages,
	):
	token_text = message_chunk.choices[0].delta.content
	if token_text is not None: # Handle None type in response
	current_response += token_text
	# Update just the last message in history
	new_history[-1] = (message, current_response)
	yield new_history
	except Exception as e:
	error_message = f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."
	new_history[-1] = (message, error_message)
	yield new_history

	print("Completed response generation.")


	# Model categories for better organization
	MODEL_CATEGORIES = {
	"Qwen": [
	"Qwen/Qwen3-235B-A22B",
	"Qwen/Qwen3-32B",
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/Qwen2.5-3B-Instruct",
	"Qwen/Qwen2.5-0.5B-Instruct",
	"Qwen/QwQ-32B",
	"Qwen/Qwen2.5-Coder-32B-Instruct",
	],
	"Meta LLaMa": [
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-3.1-70B-Instruct",
	"meta-llama/Llama-3.0-70B-Instruct",
	"meta-llama/Llama-3.2-3B-Instruct",
	"meta-llama/Llama-3.2-1B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	],
	"Mistral": [
	"mistralai/Mistral-Nemo-Instruct-2407",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"mistralai/Mistral-7B-Instruct-v0.2",
	],
	"Microsoft Phi": [
	"microsoft/Phi-3.5-mini-instruct",
	"microsoft/Phi-3-mini-128k-instruct",
	"microsoft/Phi-3-mini-4k-instruct",
	],
	"Other Models": [
	"NousResearch/Hermes-3-Llama-3.1-8B",
	"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	"HuggingFaceH4/zephyr-7b-beta",
	"HuggingFaceTB/SmolLM2-360M-Instruct",
	"tiiuae/falcon-7b-instruct",
	"01-ai/Yi-1.5-34B-Chat",
	]
	}

	# Flatten the model list for search functionality
	ALL_MODELS = []
	for category, models in MODEL_CATEGORIES.items():
	ALL_MODELS.extend(models)


	def get_model_info(model_name):
	"""Extract and format model information for display"""
	parts = model_name.split('/')
	if len(parts) != 2:
	return f"Model: {model_name}\nFormat: Unknown"

	org = parts[0]
	model = parts[1]

	# Extract numbers from model name to determine size
	import re
	size_match = re.search(r'(\d+\.?\d*)B', model)
	size = size_match.group(1) + "B" if size_match else "Unknown"

	return f"Organization: {org}\nModel: {model}\nSize: {size}"


	def set_model_and_update_info(model_name):
	"""Set the selected model and update the model info display"""
	# This function is called when a model is selected (either clicked or via API)
	try:
	# Get model info
	model_info = get_model_info(model_name)

	# Return both the model name and the model info
	return model_name, model_info
	except Exception as e:
	print(f"Error in set_model_and_update_info: {e}")
	return model_name, f"Error loading model info: {str(e)}"


	def filter_models(search_term):
	"""Filter models based on search term across all categories"""
	if not search_term:
	return MODEL_CATEGORIES

	filtered_categories = {}
	for category, models in MODEL_CATEGORIES.items():
	filtered_models = [m for m in models if search_term.lower() in m.lower()]
	if filtered_models:
	filtered_categories[category] = filtered_models

	return filtered_categories


	def update_model_display(search_term=""):
	"""Update the model selection UI based on search term"""
	filtered_categories = filter_models(search_term)

	# Create HTML for model display with a more direct approach
	html = """
	<div style='max-height: 400px; overflow-y: auto;'>
	<script>
	// Direct model selection function - more reliable
	function selectModel(modelName) {
	// Get the textbox element by its ID
	const modelInput = document.getElementById('custom-model-input');
	if (modelInput) {
	// Set the value
	modelInput.value = modelName;

	// Create and dispatch change event
	const event = new Event('change', { bubbles: true });
	modelInput.dispatchEvent(event);

	// Look for the hidden trigger button and click it
	const triggerBtn = document.querySelector('button[value="Select Model"]');
	if (triggerBtn) {
	triggerBtn.click();
	}

	console.log('Selected model:', modelName);
	} else {
	console.error('Model input element not found');
	}
	}
	</script>
	"""

	# Add models by category
	for category, models in filtered_categories.items():
	html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"

	for model in models:
	model_short = model.split('/')[-1]
	escaped_model = model.replace("'", "\\'").replace('"', '\\"')
	html += f"""
	<div class='model-card'
	style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
	background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'
	onclick="selectModel('{escaped_model}')">
	<div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
	<div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
	</div>
	"""
	html += "</div>"

	if not filtered_categories:
	html += "<p>No models found matching your search.</p>"

	html += "</div>"
	return html


	# Create custom CSS for better styling
	custom_css = """
	#app-container {
	max-width: 1200px;
	margin: 0 auto;
	padding: 20px;
	}

	#chat-container {
	border-radius: 12px;
	box-shadow: 0 8px 16px rgba(0,0,0,0.1);
	overflow: hidden;
	border: 1px solid #e0e0e0;
	}

	.contain {
	background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
	}

	h1, h2, h3 {
	font-family: 'Poppins', sans-serif;
	}

	h1 {
	background: linear-gradient(90deg, #2b6cb0, #4299e1);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 700;
	letter-spacing: -0.5px;
	margin-bottom: 8px;
	}

	.parameter-row {
	display: flex;
	gap: 10px;
	margin-bottom: 10px;
	}

	.model-card:hover {
	transform: translateY(-2px);
	box-shadow: 0 6px 12px rgba(0,0,0,0.15);
	border-color: #4299e1;
	}

	.footer {
	text-align: center;
	margin-top: 20px;
	font-size: 0.8em;
	color: #666;
	}

	/* Status indicator styles */
	.status-indicator {
	display: inline-block;
	width: 10px;
	height: 10px;
	border-radius: 50%;
	margin-right: 6px;
	}

	.status-active {
	background-color: #10B981;
	animation: pulse 2s infinite;
	}

	@keyframes pulse {
	0% {
	box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
	}
	70% {
	box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
	}
	100% {
	box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
	}
	}
	"""

	with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
	gr.HTML(f"""
	<div id="app-container">
	<div style="text-align: center; padding: 20px 0;">
	<h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
	<p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
	<div style="margin-top: 10px;">
	<span class="status-indicator status-active"></span>
	<span>Service Active</span>
	<span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
	</div>
	</div>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	# Model selection panel - MOVED TO THE LEFT SIDE
	gr.HTML("<div style='border: 1px solid #e0e0e0; border-radius: 10px; padding: 15px;'>")
	gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")

	# Custom model input (this is what the respond function sees)
	custom_model_box = gr.Textbox(
	value="Qwen/Qwen3-32B", # Changed default model to Qwen
	label="Selected Model",
	elem_id="custom-model-input"
	)

	# Search box
	model_search_box = gr.Textbox(
	label="Search Models",
	placeholder="Type to filter models...",
	lines=1
	)

	# Dynamic model display area
	model_display = gr.HTML(update_model_display())

	# Model information display
	gr.HTML("<h4>Current Model Info</h4>")
	model_info_display = gr.Markdown(get_model_info("Qwen/Qwen3-32B"))
	gr.HTML("</div>")

	with gr.Column(scale=3):
	# Main chat interface
	chatbot = gr.Chatbot(
	height=550,
	show_copy_button=True,
	placeholder="Select a model and begin chatting",
	layout="panel",
	elem_id="chat-container"
	)

	with gr.Row():
	with gr.Column(scale=8):
	msg = gr.Textbox(
	show_label=False,
	placeholder="Type your message here...",
	container=False,
	scale=8
	)
	with gr.Column(scale=1, min_width=70):
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	with gr.Accordion("Conversation Settings", open=False):
	system_message_box = gr.Textbox(
	value="You are a helpful assistant.",
	placeholder="System prompt that guides the assistant's behavior",
	label="System Prompt",
	lines=2
	)

	# Use standard Row/Column layout instead of tabs that might not be available
	gr.HTML("<h3>Basic Parameters</h3>")
	with gr.Row():
	with gr.Column():
	max_tokens_slider = gr.Slider(
	minimum=1,
	maximum=4096,
	value=512,
	step=1,
	label="Max new tokens"
	)
	with gr.Column():
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	gr.HTML("<h3>Advanced Parameters</h3>")
	with gr.Row():
	with gr.Column():
	top_p_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-P"
	)
	with gr.Column():
	frequency_penalty_slider = gr.Slider(
	minimum=-2.0,
	maximum=2.0,
	value=0.0,
	step=0.1,
	label="Frequency Penalty"
	)

	seed_slider = gr.Slider(
	minimum=-1,
	maximum=65535,
	value=-1,
	step=1,
	label="Seed (-1 for random)"
	)

	# Footer
	gr.HTML("""
	<div class="footer">
	<p>Created with Gradio • Powered by Hugging Face Inference API</p>
	<p>This interface allows you to chat with various language models without requiring a GPU</p>
	</div>
	""")

	# Add a hidden button to trigger model selection via JavaScript
	trigger_model_selection = gr.Button("Select Model", visible=False)

	# Set up event handlers
	msg.submit(
	fn=respond,
	inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
	top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
	outputs=chatbot,
	queue=True
	).then(
	lambda: "", # Clear the message box after sending
	None,
	[msg]
	)

	submit_btn.click(
	fn=respond,
	inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
	top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
	outputs=chatbot,
	queue=True
	).then(
	lambda: "", # Clear the message box after sending
	None,
	[msg]
	)

	# Update model display when search changes
	model_search_box.change(
	fn=lambda x: update_model_display(x),
	inputs=model_search_box,
	outputs=model_display
	)

	# Update model info when selection changes
	custom_model_box.change(
	fn=set_model_and_update_info,
	inputs=custom_model_box,
	outputs=[custom_model_box, model_info_display]
	)

	# Connect the hidden trigger button to update model info
	trigger_model_selection.click(
	fn=set_model_and_update_info,
	inputs=custom_model_box,
	outputs=[custom_model_box, model_info_display]
	)

	if __name__ == "__main__":
	print("Launching the enhanced multi-model chat interface.")
	demo.launch()