FinalTest

Runtime error

FinalTest / evaluation_runner.py

Create evaluation_runner.py

615d1b7 verified 3 months ago

2.2 kB

	import requests
	import pandas as pd
	from tqdm import tqdm

	class EvaluationRunner:
	API_URL = "https://agents-course-unit4-scoring.hf.space"

	def run_evaluation(self, agent, username: str, agent_code: str):
	"""Полный цикл оценки"""
	questions = self._fetch_questions()
	if isinstance(questions, str):
	return questions, 0, 0, None

	results = []
	answers = []

	for q in tqdm(questions, desc="Processing"):
	try:
	response = agent(q["question"], q["task_id"])
	answer = response.get("final_answer", "")
	answers.append({
	"task_id": q["task_id"],
	"submitted_answer": str(answer)[:500] # Лимит длины
	})
	results.append({
	"Question": q["question"][:100],
	"Your Answer": str(answer)[:100],
	"Status": "Processed"
	})
	except Exception as e:
	results.append({
	"Question": q["question"][:100],
	"Your Answer": f"Error: {str(e)}",
	"Status": "Failed"
	})

	submission_result = self._submit_answers(username, agent_code, answers)
	return submission_result, 0, len(questions), pd.DataFrame(results)

	def _fetch_questions(self):
	try:
	response = requests.get(f"{self.API_URL}/questions", timeout=30)
	return response.json()
	except Exception as e:
	return f"Failed to fetch questions: {str(e)}"

	def _submit_answers(self, username: str, agent_code: str, answers: list):
	try:
	response = requests.post(
	f"{self.API_URL}/submit",
	json={
	"username": username.strip(),
	"agent_code": agent_code.strip(),
	"answers": answers
	},
	timeout=60
	)
	return response.json().get("message", "Submitted successfully")
	except Exception as e:
	return f"Submission failed: {str(e)}"