Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 11 days ago

Commit

88fa1a5

verified ·

1 Parent(s): 51df914

Update agent.py

Browse files

Files changed (1) hide show

agent.py +76 -85

agent.py CHANGED Viewed

@@ -1,99 +1,90 @@
-# agent.py — full GAIA-ready agent with working WikipediaQueryRun + tools
 import os
 import asyncio
-from llama_index.llms.openai import OpenAI
-from llama_index.core.agent.react.base import ReActAgent
-from llama_index.core.tools import FunctionTool
-from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
-from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
-from langchain_experimental.tools.python.tool import PythonREPLTool
-from langchain_community.document_loaders import YoutubeLoader
-import whisper
-import openpyxl
-# Check OpenAI key
-if os.getenv("OPENAI_API_KEY"):
-    print("✅ Detected OPENAI_API_KEY")
-else:
-    print("⚠️ Missing OPENAI_API_KEY – LLM may fail")
-# Tools definitions
-api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
-def wikipedia_search(query: str) -> str:
-    return WikipediaQueryRun(api_wrapper=api_wrapper).run({"query": query})
-def run_python_with_output(code: str) -> str:
-    if "print(" not in code:
-        code = f"print({code})"
-    return PythonREPLTool().run(code)
-def get_youtube_transcript(url: str) -> str:
-    try:
-        loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
-        docs = loader.load()
-        return " ".join(d.page_content for d in docs)
-    except Exception as e:
-        return "[YOUTUBE ERROR] " + str(e)
-def transcribe_audio(file_path: str) -> str:
     try:
-        model = whisper.load_model("base")
-        res = model.transcribe(file_path)
-        return res["text"]
     except Exception as e:
-        return "[AUDIO ERROR] " + str(e)
-def extract_excel_total_food_sales(file_path: str) -> str:
-    try:
-        wb = openpyxl.load_workbook(file_path)
-        sheet = wb.active
-        total = 0.0
-        for _, category, amount in sheet.iter_rows(min_row=2, values_only=True):
-            if isinstance(category, str) and "food" in category.lower():
-                total += float(amount or 0)
-        return f"${total:.2f}"
-    except Exception as e:
-        return "[EXCEL ERROR] " + str(e)
-# Assemble tools
-TOOLS = [
-    FunctionTool.from_defaults(wikipedia_search),
-    FunctionTool.from_defaults(run_python_with_output),
-    FunctionTool.from_defaults(get_youtube_transcript),
-    FunctionTool.from_defaults(transcribe_audio),
-    FunctionTool.from_defaults(extract_excel_total_food_sales),
-]
-# LLM and Agent
-llm = OpenAI(model="gpt-4")
-agent = ReActAgent.from_tools(
-    tools=TOOLS,
-    llm=llm,
-    verbose=True,
-    system_prompt="""
-You are an expert AI assistant on the GAIA benchmark.
-Use available tools (Wikipedia, Python, YouTube transcript, audio, Excel).
-Output ONLY the final answer. No reasoning or commentary.
-Format exactly as requested (list, number, name, chess move, currency).
-If tool fails, output "Tool not available".
-""",
-)
-def answer_question_sync(question: str) -> str:
     try:
-        resp = agent.chat(question)
-        if hasattr(resp, "response") and hasattr(resp.response, "content"):
-            return resp.response.content.strip()
-        return str(resp).strip()
     except Exception as e:
-        print("❌ Agent exception:", e)
-        return "[ERROR] " + str(e)
-async def answer_question(question: str) -> str:
-    return answer_question_sync(question)

+# app.py – przywrócony layout benchmarku z poprawionym wywołaniem agenta
 import os
+import requests
+import pandas as pd
+import gradio as gr
 import asyncio
+from agent import answer_question
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class GAIALlamaAgent:
+    def __call__(self, question: str) -> str:
+        return asyncio.run(answer_question(question))
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
+    if not profile or not profile.username:
+        return "Please Login to Hugging Face with the button.", None
+    username = profile.username.strip()
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
+    questions_url = f"{DEFAULT_API_URL}/questions"
+    submit_url = f"{DEFAULT_API_URL}/submit"
     try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
     except Exception as e:
+        return f"Error fetching questions: {e}", None
+    agent = GAIALlamaAgent()
+    results_log = []
+    answers_payload = []
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            continue
+        try:
+            submitted_answer = agent(question_text)
+        except Exception as e:
+            submitted_answer = f"[ERROR] {e}"
+        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        return f"Submission Failed: {e}", pd.DataFrame(results_log)
+# --- Gradio Interface matching original benchmark ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown("""
+    **Instructions:**
+    1. Please clone this space and modify the agent logic.
+    2. Log in to Hugging Face with the button.
+    3. Click 'Run Evaluation & Submit All Answers' to run the full GAIA test.
+    """)
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers")
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+if __name__ == "__main__":
+    print("\n===== Application Startup =====")
+    space_id = os.getenv("SPACE_ID")
+    if space_id:
+        print(f"🔗 Space: https://huggingface.co/spaces/{space_id}")
+    demo.launch(debug=True)