Spaces:

Enderchef
/

AI-Leaderboard

Sleeping

App Files Files Community

AI-Leaderboard / evaluation_queue.py

Quazim0t0

Update evaluation_queue.py

3582217 verified 5 months ago

raw

history blame

9.69 kB

	"""
	Updated create_model_submission_ui function that properly displays benchmark names in dropdown.
	Replace this function in your evaluation_queue.py file.
	"""

	def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
	"""Create the model submission UI components.

	Args:
	evaluation_queue: Evaluation queue instance
	auth_manager: Authentication manager instance
	db_manager: Database manager instance

	Returns:
	gr.Blocks: Gradio Blocks component with model submission UI
	"""
	with gr.Blocks() as submission_ui:
	with gr.Tab("Submit Model"):
	gr.Markdown(f"""
	### Model Size Restrictions

	Models must fit within {evaluation_queue.memory_limit_gb}GB of RAM for evaluation.
	Large models will be rejected to ensure all evaluations can complete successfully.
	""", elem_classes=["info-text"])

	with gr.Row():
	with gr.Column(scale=2):
	model_id_input = gr.Textbox(
	placeholder="HuggingFace model ID (e.g., 'gpt2', 'facebook/opt-350m')",
	label="Model ID"
	)

	check_size_button = gr.Button("Check Model Size")
	size_check_result = gr.Markdown("")

	model_name_input = gr.Textbox(
	placeholder="Display name for your model",
	label="Model Name"
	)

	model_description_input = gr.Textbox(
	placeholder="Brief description of your model",
	label="Description",
	lines=3
	)

	model_parameters_input = gr.Number(
	label="Number of Parameters (billions)",
	precision=2
	)

	with gr.Column(scale=1):
	model_tag_input = gr.Dropdown(
	choices=evaluation_queue.model_tags,
	label="Model Tag",
	info="Select one category that best describes your model"
	)

	# Fixed benchmark dropdown to properly show names
	benchmark_dropdown = gr.Dropdown(
	label="Benchmark",
	info="Select a benchmark to evaluate your model on",
	choices=[("none", "Loading benchmarks...")],
	value=None
	)

	refresh_benchmarks_button = gr.Button("Refresh Benchmarks")

	submit_model_button = gr.Button("Submit for Evaluation")
	submission_status = gr.Markdown("")

	with gr.Tab("Evaluation Queue"):
	refresh_queue_button = gr.Button("Refresh Queue")

	with gr.Row():
	with gr.Column(scale=1):
	queue_stats = gr.JSON(
	label="Queue Statistics"
	)

	with gr.Column(scale=2):
	queue_status = gr.Dataframe(
	headers=["ID", "Model", "Benchmark", "Status", "Submitted"],
	label="Recent Evaluations"
	)

	with gr.Row(visible=True) as progress_container:
	with gr.Column():
	current_eval_info = gr.Markdown("No evaluation currently running")
	# Use a simple text display for progress instead of Progress component
	progress_display = gr.Markdown("Progress: 0%")

	# Event handlers
	def check_model_size_handler(model_id):
	if not model_id:
	return "Please enter a HuggingFace model ID."

	try:
	will_fit, message = evaluation_queue.check_model_size(model_id)

	if will_fit:
	return f"✅ {message}"
	else:
	return f"❌ {message}"
	except Exception as e:
	return f"Error checking model size: {str(e)}"

	def refresh_benchmarks_handler():
	benchmarks = db_manager.get_benchmarks()

	# Format for dropdown - properly formatted to display names
	choices = []
	for b in benchmarks:
	# Add as tuple of (id, name) to ensure proper display
	choices.append((str(b["id"]), b["name"]))

	if not choices:
	choices = [("none", "No benchmarks available - add some first")]

	return gr.update(choices=choices)

	def submit_model_handler(model_id, model_name, model_description, model_parameters, model_tag, benchmark_id, request: gr.Request):
	# Check if user is logged in
	user = auth_manager.check_login(request)

	if not user:
	return "Please log in to submit a model."

	if not model_id or not model_name or not model_tag or not benchmark_id:
	return "Please fill in all required fields."

	if benchmark_id == "none":
	return "Please select a valid benchmark."

	try:
	# Check if model will fit in RAM
	will_fit, size_message = evaluation_queue.check_model_size(model_id)

	if not will_fit:
	return f"❌ {size_message}"

	# Add model to database
	model_db_id = db_manager.add_model(
	name=model_name,
	hf_model_id=model_id,
	user_id=user["id"],
	tag=model_tag,
	parameters=str(model_parameters) if model_parameters else None,
	description=model_description
	)

	if not model_db_id:
	return "Failed to add model to database."

	# Submit for evaluation
	eval_id, message = evaluation_queue.submit_evaluation(
	model_id=model_db_id,
	benchmark_id=benchmark_id,
	user_id=user["id"]
	)

	if eval_id:
	return f"✅ Model submitted successfully. {size_message}\nEvaluation ID: {eval_id}"
	else:
	return message
	except Exception as e:
	return f"Error submitting model: {str(e)}"

	def refresh_queue_handler():
	# Get queue statistics
	stats = evaluation_queue.get_queue_status()

	# Get recent evaluations (all statuses, limited to 20)
	evals = db_manager.get_evaluation_results(limit=20)

	# Format for dataframe
	eval_data = []
	for eval in evals:
	eval_data.append([
	eval["id"],
	eval["model_name"],
	eval["benchmark_name"],
	eval["status"],
	eval["submitted_at"]
	])

	# Also update progress display
	current_eval, progress = evaluation_queue.get_current_progress()
	if current_eval:
	model_info = db_manager.get_model(current_eval['model_id'])
	benchmark_info = db_manager.get_benchmark(current_eval['benchmark_id'])

	if model_info and benchmark_info:
	eval_info = f"Currently Evaluating: {model_info['name']} on {benchmark_info['name']}"
	progress_text = f"Progress: {progress}%"
	return stats, eval_data, eval_info, progress_text

	return stats, eval_data, "No evaluation currently running", "Progress: 0%"

	# Connect event handlers
	check_size_button.click(
	fn=check_model_size_handler,
	inputs=[model_id_input],
	outputs=[size_check_result]
	)

	refresh_benchmarks_button.click(
	fn=refresh_benchmarks_handler,
	inputs=[],
	outputs=[benchmark_dropdown]
	)

	submit_model_button.click(
	fn=submit_model_handler,
	inputs=[
	model_id_input,
	model_name_input,
	model_description_input,
	model_parameters_input,
	model_tag_input,
	benchmark_dropdown
	],
	outputs=[submission_status]
	)

	refresh_queue_button.click(
	fn=refresh_queue_handler,
	inputs=[],
	outputs=[queue_stats, queue_status, current_eval_info, progress_display]
	)

	# Initialize on load
	submission_ui.load(
	fn=refresh_benchmarks_handler,
	inputs=[],
	outputs=[benchmark_dropdown]
	)

	submission_ui.load(
	fn=refresh_queue_handler,
	inputs=[],
	outputs=[queue_stats, queue_status, current_eval_info, progress_display]
	)

	return submission_ui