Spaces:

schoolkithub
/

GAIA_AGE

Sleeping

GAIA_AGE / app.py

ghost

Updated GAIA agent for submission

945d0d0 13 days ago

4.18 kB

	import gradio as gr
	import json
	import os
	from datetime import datetime
	from agent import GAIAAgent
	from evaluate import evaluate_agent, create_sample_dataset
	import traceback

	def run_evaluation():
	"""Run the GAIA evaluation and return results."""
	try:
	print("Starting GAIA Agent Evaluation...")
	print("=" * 50)

	# Initialize agent
	agent = GAIAAgent()

	# Test API connection first
	print("Testing xAI API connection...")
	test_response = agent.test_grok()
	print(f"API Test Response: {test_response}")

	# Run evaluation on sample dataset (since we don't have the full GAIA dataset)
	print("\nRunning evaluation on sample tasks...")
	score = evaluate_agent(dataset_path=None, max_tasks=10)

	# Read submission file if it exists
	submission_content = ""
	if os.path.exists("submission.jsonl"):
	with open("submission.jsonl", "r") as f:
	submission_content = f.read()

	# Format results
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	results = f"""
	# GAIA Agent Evaluation Results

	Timestamp: {timestamp}
	Final Score: {score:.2f}%
	Certificate Status: {'✅ ACHIEVED (≥30%)' if score >= 30 else '❌ NOT ACHIEVED (<30%)'}

	## API Connection Status
	{test_response}

	## Submission File Preview
	```json
	{submission_content[:500]}{'...' if len(submission_content) > 500 else ''}
	```

	## Next Steps
	{'🎉 Congratulations! You can now claim your Certificate of Excellence!' if score >= 30 else '💪 Keep improving your agent to reach the 30% threshold.'}
	"""

	return results, score

	except Exception as e:
	error_msg = f"""
	# Evaluation Error

	Error: {str(e)}

	Traceback:
	```
	{traceback.format_exc()}
	```

	Please check the logs and fix any issues before retrying.
	"""
	return error_msg, 0.0

	def create_interface():
	"""Create the Gradio interface."""

	with gr.Blocks(title="GAIA Agent Evaluation", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🤖 GAIA Agent Evaluation

	This is your GAIA benchmark agent for the Hugging Face Agents Course Certificate of Excellence.

	Goal: Achieve ≥30% score on GAIA benchmark tasks

	Click the button below to run the evaluation and submit your answers.

	⚠️ Note: This may take several minutes to complete. Please be patient.
	""")

	with gr.Row():
	run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	gr.Markdown("## Run Status / Submission Result")
	results_output = gr.Markdown("Click the button above to start evaluation...")

	with gr.Column():
	gr.Markdown("## Score")
	score_output = gr.Number(label="Final Score (%)", value=0.0, interactive=False)

	# Event handler
	run_btn.click(
	fn=run_evaluation,
	inputs=[],
	outputs=[results_output, score_output],
	show_progress=True
	)

	gr.Markdown("""
	---

	## About This Agent

	- API: xAI Grok for reasoning
	- Tools: Web search, file handling, math calculations
	- Fallbacks: Local knowledge for common questions
	- Target: 30% accuracy for certificate eligibility

	## Troubleshooting

	If you encounter issues:
	1. Check the container logs in the "Logs" tab
	2. Verify API credentials and internet connectivity
	3. Ensure all dependencies are installed

	Good luck! 🍀
	""")

	return demo

	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	show_api=False
	)