File size: 6,119 Bytes
6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef 71c05d4 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f 6b4a7ef f7cf33f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import json
import time
import requests
import gradio as gr
import pandas as pd
from tqdm import tqdm
from agent import GAIAExpertAgent
# Константы
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class EvaluationRunner:
"""Оптимизированный обработчик оценки"""
def __init__(self, api_url=DEFAULT_API_URL):
self.api_url = api_url
self.questions_url = f"{api_url}/questions"
self.submit_url = f"{api_url}/submit"
self.results_url = f"{api_url}/results"
self.correct_answers = 0
self.total_questions = 0
def run_evaluation(self, agent, username: str, agent_code: str) -> Tuple[str, pd.DataFrame]:
questions_data = self._fetch_questions()
if not isinstance(questions_data, list):
return questions_data, pd.DataFrame()
results_log, answers_payload = self._run_agent_on_questions(agent, questions_data)
if not answers_payload:
return "No answers generated", pd.DataFrame()
submission_result = self._submit_answers(username, agent_code, answers_payload)
return submission_result, pd.DataFrame(results_log)
def _fetch_questions(self):
try:
response = requests.get(self.questions_url, timeout=30)
response.raise_for_status()
questions_data = response.json()
self.total_questions = len(questions_data)
print(f"Fetched {self.total_questions} questions")
return questions_data
except Exception as e:
return f"Error: {str(e)}"
def _run_agent_on_questions(self, agent, questions_data):
results_log = []
answers_payload = []
print(f"Processing {len(questions_data)} questions...")
for item in tqdm(questions_data, desc="Questions"):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or not question_text:
continue
try:
json_response = agent(question_text, task_id)
response_obj = json.loads(json_response)
answer = response_obj.get("final_answer", "")
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Answer": answer[:50] + "..." if len(answer) > 50 else answer
})
except Exception as e:
answers_payload.append({"task_id": task_id, "submitted_answer": f"ERROR: {str(e)}"})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Answer": f"ERROR: {str(e)}"
})
return results_log, answers_payload
def _submit_answers(self, username: str, agent_code: str, answers_payload):
submission_data = {
"username": username.strip(),
"agent_code": agent_code.strip(),
"answers": answers_payload
}
print("Submitting answers...")
try:
response = requests.post(
self.submit_url,
json=submission_data,
headers={"Content-Type": "application/json"},
timeout=60
)
response.raise_for_status()
return response.json().get("message", "Answers submitted successfully")
except Exception as e:
return f"Submission failed: {str(e)}"
def run_evaluation(username: str, agent_code: str, model_name: str):
print("Initializing GAIA Expert Agent...")
agent = GAIAExpertAgent(model_name=model_name)
print("Starting evaluation...")
runner = EvaluationRunner()
result, results_df = runner.run_evaluation(agent, username, agent_code)
# Добавляем счетчики вопросов
total_questions = runner.total_questions
correct_answers = runner.correct_answers if hasattr(runner, 'correct_answers') else 0
return result, correct_answers, total_questions, results_df
def create_gradio_interface():
with gr.Blocks(title="GAIA Expert Agent") as demo:
gr.Markdown("# 🧠 GAIA Expert Agent Evaluation")
with gr.Row():
with gr.Column():
gr.Markdown("### Configuration")
username = gr.Textbox(label="Hugging Face Username", value="yoshizen")
agent_code = gr.Textbox(
label="Agent Code",
value="https://huggingface.co/spaces/yoshizen/FinalTest"
)
model_name = gr.Dropdown(
label="Model",
choices=[
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large"
],
value="google/flan-t5-large"
)
run_button = gr.Button("🚀 Run Evaluation", variant="primary")
with gr.Column():
gr.Markdown("### Results")
result_text = gr.Textbox(label="Submission Status")
correct_answers = gr.Number(label="Correct Answers")
total_questions = gr.Number(label="Total Questions")
results_table = gr.Dataframe(label="Processed Questions", interactive=False)
run_button.click(
fn=run_evaluation,
inputs=[username, agent_code, model_name],
outputs=[result_text, correct_answers, total_questions, results_table]
)
return demo
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch(server_name="0.0.0.0", server_port=7860) |