|
import re |
|
import requests |
|
import pandas as pd |
|
import torch |
|
import gradio as gr |
|
from tqdm import tqdm |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import json |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
MODEL_NAME = "google/flan-t5-large" |
|
|
|
class GAIAExpertAgent: |
|
def __init__(self, model_name: str = MODEL_NAME): |
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"⚡ Инициализация агента на {self.device.upper()}") |
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
self.model = AutoModelForSeq2SeqLM.from_pretrained( |
|
model_name, |
|
device_map="auto", |
|
torch_dtype=torch.float16 if "cuda" in self.device else torch.float32, |
|
low_cpu_mem_usage=True |
|
).eval() |
|
print("✅ Агент готов") |
|
|
|
def __call__(self, question: str, task_id: str = None) -> str: |
|
try: |
|
|
|
inputs = self.tokenizer( |
|
f"Solve step-by-step: {question}\nFinal Answer:", |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True |
|
).to(self.device) |
|
|
|
|
|
outputs = self.model.generate( |
|
**inputs, |
|
max_new_tokens=256, |
|
num_beams=5, |
|
early_stopping=True, |
|
repetition_penalty=2.0 |
|
) |
|
|
|
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
if "cuda" in self.device: |
|
torch.cuda.empty_cache() |
|
|
|
return json.dumps({"final_answer": answer.strip()}) |
|
|
|
except Exception as e: |
|
return json.dumps({"final_answer": f"ERROR: {str(e)}"}) |
|
|
|
|
|
class EvaluationRunner: |
|
def __init__(self, api_url: str = DEFAULT_API_URL): |
|
self.api_url = api_url |
|
self.questions_url = f"{api_url}/questions" |
|
self.submit_url = f"{api_url}/submit" |
|
self.session = requests.Session() |
|
|
|
def run_evaluation(self, agent, username: str, agent_code: str, progress=tqdm): |
|
|
|
questions = self._fetch_questions() |
|
if not isinstance(questions, list): |
|
return questions, 0, 0, pd.DataFrame() |
|
|
|
|
|
results = [] |
|
answers = [] |
|
for q in progress(questions, desc="Processing GAIA tasks"): |
|
try: |
|
json_response = agent(q["question"], q["task_id"]) |
|
response_obj = json.loads(json_response) |
|
answer = response_obj.get("final_answer", "") |
|
|
|
answers.append({ |
|
"task_id": q["task_id"], |
|
"submitted_answer": str(answer)[:500] |
|
}) |
|
|
|
results.append({ |
|
"Task ID": q["task_id"], |
|
"Question": q["question"], |
|
"Answer": str(answer) |
|
}) |
|
except Exception as e: |
|
results.append({ |
|
"Task ID": q.get("task_id", "N/A"), |
|
"Question": "Error", |
|
"Answer": f"ERROR: {str(e)}" |
|
}) |
|
|
|
|
|
submission_result = self._submit_answers(username, agent_code, answers) |
|
return submission_result, 0, len(questions), pd.DataFrame(results) |
|
|
|
def _fetch_questions(self): |
|
try: |
|
response = self.session.get( |
|
self.questions_url, |
|
timeout=60, |
|
headers={"Accept": "application/json"} |
|
) |
|
response.raise_for_status() |
|
return response.json() |
|
except Exception as e: |
|
return f"Ошибка получения вопросов: {str(e)}" |
|
|
|
def _submit_answers(self, username: str, agent_code: str, answers: list): |
|
try: |
|
response = self.session.post( |
|
self.submit_url, |
|
json={ |
|
"username": username.strip(), |
|
"agent_code": agent_code.strip(), |
|
"answers": answers |
|
}, |
|
timeout=120 |
|
) |
|
response.raise_for_status() |
|
return response.json().get("message", "Ответы успешно отправлены") |
|
except Exception as e: |
|
return f"Ошибка отправки: {str(e)}" |
|
|
|
|
|
|
|
def run_evaluation(username: str, agent_code: str, progress=gr.Progress()): |
|
progress(0, desc="Инициализация модели...") |
|
agent = GAIAExpertAgent() |
|
|
|
progress(0, desc="Запуск оценки...") |
|
runner = EvaluationRunner() |
|
|
|
|
|
class ProgressWrapper: |
|
def __call__(self, iterable, desc=""): |
|
progress(0, desc=desc) |
|
for i, x in enumerate(iterable): |
|
progress(i / len(iterable)) |
|
yield x |
|
|
|
return runner.run_evaluation(agent, username, agent_code, progress=ProgressWrapper()) |
|
|
|
|
|
|
|
with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("""# 🧠 GAIA Mastermind Agent |
|
## *Многошаговое решение сложных задач*""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### 🔐 Авторизация") |
|
username = gr.Textbox(label="HF Username", value="yoshizen") |
|
agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest") |
|
run_btn = gr.Button("Запустить оценку", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("### 📊 Результаты") |
|
with gr.Row(): |
|
result_output = gr.Textbox(label="Статус") |
|
correct_output = gr.Number(label="Правильные ответы") |
|
total_output = gr.Number(label="Всего вопросов") |
|
results_table = gr.Dataframe( |
|
label="Детализация ответов", |
|
interactive=True, |
|
wrap=True, |
|
overflow_row_behaviour="paginate", |
|
height=500 |
|
) |
|
|
|
run_btn.click( |
|
fn=run_evaluation, |
|
inputs=[username, agent_code], |
|
outputs=[result_output, correct_output, total_output, results_table], |
|
concurrency_limit=1 |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue(max_size=10).launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
show_error=True |
|
) |