hassenhamdi commited on
Commit
09f04d8
·
verified ·
1 Parent(s): 6573e0d

Create evaluator.py

Browse files
Files changed (1) hide show
  1. evaluator.py +158 -0
evaluator.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluator.py
2
+ from settings import Settings
3
+ from typing import List
4
+ from models import Question, QuestionAnswerPair, Results
5
+ import requests
6
+ import random
7
+ import json
8
+ import logging
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class Evaluator:
13
+ def __init__(self, settings: Settings):
14
+ self.settings = settings
15
+
16
+ def get_questions(self) -> list[Question]:
17
+ """
18
+ Get the questions from the HuggingFace endpoint.
19
+
20
+ Returns:
21
+ list[Question]: A list of Question objects
22
+ """
23
+ url = str(self.settings.scoring_api_base_url) + "/questions" # Ensure trailing slash for URL
24
+ try:
25
+ response = requests.get(url, timeout=10)
26
+ response.raise_for_status()
27
+ questions = [Question(**question) for question in response.json()]
28
+ # Save questions to a local file for fallback/debugging
29
+ with open("questions.json", "w") as f:
30
+ json.dump([question.model_dump()
31
+ for question in questions], f, indent=4)
32
+ logger.info(f"Successfully fetched {len(questions)} questions from API.")
33
+ except requests.exceptions.RequestException as e:
34
+ logger.warning(f"Error fetching questions from API ({e}). Attempting to read from local file 'questions.json'.")
35
+ try:
36
+ # Read local file instead, dealing with API rate limits, etc.
37
+ with open("questions.json", "r") as f:
38
+ questions = [Question(**question) for question in json.load(f)]
39
+ logger.info(f"Successfully loaded {len(questions)} questions from local file.")
40
+ except FileNotFoundError:
41
+ logger.error("Local 'questions.json' not found. Cannot retrieve questions.")
42
+ questions = []
43
+ except json.JSONDecodeError as json_e:
44
+ logger.error(f"Error decoding local 'questions.json': {json_e}")
45
+ questions = []
46
+ return questions
47
+
48
+ def get_one_question(self, task_id=None) -> Question:
49
+ """
50
+ Get a random, or requested question from the HuggingFace endpoint.
51
+ Falls back to local file if API fails.
52
+
53
+ Returns:
54
+ Question: A Question object
55
+ """
56
+ if task_id:
57
+ questions = self.get_questions() # Get all questions to find specific task_id
58
+ for question in questions:
59
+ if question.task_id == task_id:
60
+ logger.info(f"Found specific question for task_id: {task_id}")
61
+ return question
62
+ logger.warning(f"Task ID {task_id} not found in fetched questions. Attempting random.")
63
+
64
+ # Try fetching a random question from the API
65
+ try:
66
+ url = str(self.settings.scoring_api_base_url) + "/random-question" # Ensure trailing slash
67
+ response = requests.get(url, timeout=10)
68
+ response.raise_for_status()
69
+ question = Question(**response.json())
70
+ logger.info(f"Successfully fetched random question from API: {question.task_id}")
71
+ return question
72
+ except requests.exceptions.RequestException as e:
73
+ logger.warning(f"Error fetching random question from API ({e}). Attempting to get random from local file.")
74
+ # Fallback to local file
75
+ questions = self.get_questions()
76
+ if questions:
77
+ random_question = questions[random.randint(0, len(questions)-1)]
78
+ logger.info(f"Loaded random question from local file: {random_question.task_id}")
79
+ return random_question
80
+ else:
81
+ logger.error("No questions available from API or local file.")
82
+ raise ValueError("No questions available to retrieve.")
83
+
84
+
85
+ def _read_answer_file(self, username: str) -> List[QuestionAnswerPair]:
86
+ """Read the question answer pairs from a user-specific answer file."""
87
+ file_name = f"answers_{username}.json"
88
+ if not os.path.exists(file_name):
89
+ raise FileNotFoundError(f"Answer file '{file_name}' not found for user '{username}'.")
90
+ with open(file_name, "r") as f:
91
+ pairs = [QuestionAnswerPair(**pair) for pair in json.load(f)]
92
+ return pairs
93
+
94
+ def submit_answers(self, username: str) -> str:
95
+ """Submits saved answers to the scoring endpoint and returns the result."""
96
+ try:
97
+ pairs_to_submit = self._read_answer_file(username)
98
+ answers_payload = [pair.get_answer() for pair in pairs_to_submit]
99
+ except FileNotFoundError as e:
100
+ logger.error(e)
101
+ return "Click 'Get One Answer' or 'Get All Answers' to run agent before trying to submit."
102
+ except Exception as e:
103
+ logger.error(f"Error reading local answer file: {e}")
104
+ return f"Error preparing answers for submission: {e}"
105
+
106
+ agent_code = f"https://huggingface.co/spaces/{self.settings.space_id}/tree/main"
107
+ submission_data = {
108
+ "username": self.settings.username, # Use username from settings for submission
109
+ "code_link": agent_code, # Changed from agent_code to code_link as per GAIA PDF
110
+ "answers": answers_payload
111
+ }
112
+ submit_url = str(self.settings.scoring_api_base_url) + "/submit" # Ensure trailing slash
113
+ logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
114
+ try:
115
+ response = requests.post(
116
+ submit_url, json=submission_data, timeout=60)
117
+ response.raise_for_status()
118
+ results = Results.model_validate(response.json())
119
+ logger.info(
120
+ f"Submission successful.\n"
121
+ f"User: {results.username}.\n"
122
+ f"Overall Score: {results.score}%.\n"
123
+ f"Correct Count: {results.correct_count}.\n"
124
+ f"Total Attempted: {results.total_attempted}.\n"
125
+ f"Message: {results.message}.\n"
126
+ f"Timestamp: {results.timestamp}.\n"
127
+ )
128
+ status_message = (
129
+ f"Submission Successful!\n"
130
+ f"User: {results.username}\n"
131
+ f"Overall Score: {results.score}% "
132
+ f"({results.correct_count}/{results.total_attempted} correct)\n"
133
+ f"Message: {results.message}"
134
+ )
135
+ return status_message
136
+ except requests.exceptions.HTTPError as e:
137
+ error_detail = f"Server responded with status {e.response.status_code}."
138
+ try:
139
+ error_json = e.response.json()
140
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
141
+ except requests.exceptions.JSONDecodeError:
142
+ error_detail += f" Response: {e.response.text[:500]}"
143
+ status_message = f"Submission Failed: {error_detail}"
144
+ logger.error(status_message) # Changed to error for HTTP errors
145
+ return status_message
146
+ except requests.exceptions.Timeout:
147
+ status_message = "Submission Failed: The request timed out."
148
+ logger.error(status_message) # Changed to error
149
+ return status_message
150
+ except requests.exceptions.RequestException as e:
151
+ status_message = f"Submission Failed: Network error - {e}"
152
+ logger.error(status_message) # Changed to error
153
+ return status_message
154
+ except Exception as e:
155
+ status_message = f"An unexpected error occurred during submission: {e}"
156
+ logger.error(status_message) # Changed to error
157
+ return status_message
158
+