yoshizen commited on
Commit
615d1b7
·
verified ·
1 Parent(s): c954a5e

Create evaluation_runner.py

Browse files
Files changed (1) hide show
  1. evaluation_runner.py +60 -0
evaluation_runner.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from tqdm import tqdm
4
+
5
+ class EvaluationRunner:
6
+ API_URL = "https://agents-course-unit4-scoring.hf.space"
7
+
8
+ def run_evaluation(self, agent, username: str, agent_code: str):
9
+ """Полный цикл оценки"""
10
+ questions = self._fetch_questions()
11
+ if isinstance(questions, str):
12
+ return questions, 0, 0, None
13
+
14
+ results = []
15
+ answers = []
16
+
17
+ for q in tqdm(questions, desc="Processing"):
18
+ try:
19
+ response = agent(q["question"], q["task_id"])
20
+ answer = response.get("final_answer", "")
21
+ answers.append({
22
+ "task_id": q["task_id"],
23
+ "submitted_answer": str(answer)[:500] # Лимит длины
24
+ })
25
+ results.append({
26
+ "Question": q["question"][:100],
27
+ "Your Answer": str(answer)[:100],
28
+ "Status": "Processed"
29
+ })
30
+ except Exception as e:
31
+ results.append({
32
+ "Question": q["question"][:100],
33
+ "Your Answer": f"Error: {str(e)}",
34
+ "Status": "Failed"
35
+ })
36
+
37
+ submission_result = self._submit_answers(username, agent_code, answers)
38
+ return submission_result, 0, len(questions), pd.DataFrame(results)
39
+
40
+ def _fetch_questions(self):
41
+ try:
42
+ response = requests.get(f"{self.API_URL}/questions", timeout=30)
43
+ return response.json()
44
+ except Exception as e:
45
+ return f"Failed to fetch questions: {str(e)}"
46
+
47
+ def _submit_answers(self, username: str, agent_code: str, answers: list):
48
+ try:
49
+ response = requests.post(
50
+ f"{self.API_URL}/submit",
51
+ json={
52
+ "username": username.strip(),
53
+ "agent_code": agent_code.strip(),
54
+ "answers": answers
55
+ },
56
+ timeout=60
57
+ )
58
+ return response.json().get("message", "Submitted successfully")
59
+ except Exception as e:
60
+ return f"Submission failed: {str(e)}"