Final_Assignment_Template

Paused

App Files Files Community

civerson916 commited on May 10

Commit

476b409

verified ·

1 Parent(s): 64e1e58

Upload 7 files

Browse files

Refactored adding Evaluator and Runner classes

Files changed (5) hide show

agent.py +69 -0
app.py +10 -13
evaluator.py +120 -0
runner.py +71 -0
tools.py +199 -0

agent.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import contextlib
+import io
+import logging
+import os
+logger = logging.getLogger(__name__)
+from models import GoogleModelID, OpenRouterModelID
+from settings import Settings
+from smolagents import LiteLLMModel, CodeAgent
+from smolagents import GoogleSearchTool, VisitWebpageTool, FinalAnswerTool
+from smolagents.local_python_executor import BASE_PYTHON_TOOLS
+from tools import GetTaskFileTool, VideoUnderstandingTool, AudioUnderstandingTool
+from tools import ChessPiecePlacementTool, ChessGameFenTool, BestChessMoveTool, ConvertChessMoveTool
+# Base tools may use these to process files
+BASE_PYTHON_TOOLS["open"] = open
+BASE_PYTHON_TOOLS["os"] = os
+BASE_PYTHON_TOOLS["io"] = io
+BASE_PYTHON_TOOLS["contextlib"] = contextlib
+BASE_PYTHON_TOOLS["exec"] = exec
+class BasicAgent:
+    def __init__(self, settings: Settings):
+        self.agent = CodeAgent(
+            add_base_tools=False,
+            tools=[GoogleSearchTool("serper"),
+                   VisitWebpageTool(max_output_length=100000),
+                   FinalAnswerTool(),
+                   GetTaskFileTool(settings),
+                   VideoUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH),
+                   AudioUnderstandingTool(settings, GoogleModelID.GEMINI_2_0_FLASH),
+                   ChessPiecePlacementTool(),
+                   ChessGameFenTool(settings, OpenRouterModelID.GPT_O4_MINI),
+                   BestChessMoveTool(settings),
+                   ConvertChessMoveTool(settings, OpenRouterModelID.QWEN_3_14B_FREE)
+                   ],
+            additional_authorized_imports=[
+                "unicodedata",
+                "stat",
+                "datetime",
+                "random",
+                "pandas",
+                "itertools",
+                "math",
+                "statistics",
+                "queue",
+                "time",
+                "collections",
+                "re",
+                "os"
+            ],
+            max_steps=10,
+            verbosity_level=1,
+            model=LiteLLMModel(
+                # model_id=OpenRouterModelID.GPT_O4_MINI,
+                model_id=OpenRouterModelID.GPT_4_1_MINI,
+                # model_id=OpenRouterModelID.GROK_3_BETA,
+                # model_id=OpenRouterModelID.GROK_3_MINI_BETA,
+                api_key = settings.openrouter_api_key.get_secret_value(),
+                temperature=0.0, timeout=180
+            )
+        )
+        # print("BasicAgent initialized.")
+    def __call__(self, question: str) -> str:
+        logger.info(f"Agent received question (first 50 chars): {question[:50]}...")
+        final_answer = self.agent.run(question)
+        logger.info(f"Agent returning fixed answer: {final_answer}")
+        return final_answer

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExport
 from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry import trace
-# from evaluator import Evaluator
-# from runner import Runner
 from settings import Settings
 import os
 import pandas as pd
@@ -13,8 +13,8 @@ import logging
 logging.basicConfig(level=logging.INFO, force=True)
 logger = logging.getLogger(__name__)
 settings = Settings()
-# evaluator = Evaluator(settings)
-# runner = Runner(settings)
 # Create a TracerProvider for OpenTelemetry
@@ -45,22 +45,19 @@ EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
 def run_one(profile: gr.OAuthProfile | None) -> pd.DataFrame:
     if not user_logged_in(profile):
         return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
-    # questions = [evaluator.get_one_question()]
-    # return "Answer one random question...", runner.run_agent(questions)
-    return "You are logged in.", EMPTY_RESULTS_TABLE
 def run_all(profile: gr.OAuthProfile | None) -> pd.DataFrame:
     if not user_logged_in(profile):
         return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
-    # questions = evaluator.get_questions()
-    # return "Answer all 20 questions...", runner.run_agent(questions)
-    return "You are logged in.", EMPTY_RESULTS_TABLE
 def submit(profile: gr.OAuthProfile | None):
     if not user_logged_in(profile):
         return LOGIN_MESSAGE
-    # evaluator.submit_answers()
-    return "You are logged in."
 # --- Build Gradio Interface using Blocks ---
@@ -77,7 +74,7 @@ with gr.Blocks() as demo:
         ---
         **Disclaimers:**
         Once clicking 'Get All Answers', it can take quite some time (this is the time for the agent to go through all 20 questions).
-        The agent will run question tasks in parallel making observability tools a must. Langfuse instrumentation has been configured.
         The 'Submit All Answers' button will use the most recent agent answers cached in the space for your username.
         """
     )

 from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry import trace
+from evaluator import Evaluator
+from runner import Runner
 from settings import Settings
 import os
 import pandas as pd
 logging.basicConfig(level=logging.INFO, force=True)
 logger = logging.getLogger(__name__)
 settings = Settings()
+evaluator = Evaluator(settings)
+runner = Runner(settings)
 # Create a TracerProvider for OpenTelemetry
 def run_one(profile: gr.OAuthProfile | None) -> pd.DataFrame:
     if not user_logged_in(profile):
         return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+    questions = [evaluator.get_one_question()]
+    return "Answer one random question...", runner.run_agent(questions)
 def run_all(profile: gr.OAuthProfile | None) -> pd.DataFrame:
     if not user_logged_in(profile):
         return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
+    questions = evaluator.get_questions()
+    return "Answer all 20 questions...", runner.run_agent(questions)
 def submit(profile: gr.OAuthProfile | None):
     if not user_logged_in(profile):
         return LOGIN_MESSAGE
+    evaluator.submit_answers()
 # --- Build Gradio Interface using Blocks ---
         ---
         **Disclaimers:**
         Once clicking 'Get All Answers', it can take quite some time (this is the time for the agent to go through all 20 questions).
+        The agent(s) will run question tasks in parallel making the logs hard to follow. Langfuse instrumentation has been configured.
         The 'Submit All Answers' button will use the most recent agent answers cached in the space for your username.
         """
     )

evaluator.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from settings import Settings
+from typing import List
+from models import Question, QuestionAnswerPair, Results
+import requests
+import random
+import json
+import logging
+logger = logging.getLogger(__name__)
+class Evaluator():
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    def get_questions(self) -> list[Question]:
+        """
+        Get the questions from the HuggingFace endpoint.
+        Returns:
+            list[Question]: A list of Question objects
+        """
+        url = str(self.settings.scoring_api_base_url) + "questions"
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            questions = [Question(**question) for question in response.json()]
+            with open("questions.json", "w") as f:
+                json.dump([question.model_dump()
+                           for question in questions], f, indent=4)
+        except:
+            # Read local file instead, dealing with rate limits, etc.
+            with open("questions.json", "r") as f:
+                questions = [Question(**question) for question in json.load(f)]
+        return questions
+    def get_one_question(self, task_id=None) -> Question:
+        """
+        Get a random, or requested question from the HuggingFace endpoint.
+        Returns:
+            Question: A Question object
+        """
+        if task_id:
+            questions = self.get_questions()
+            if task_id:
+                for question in questions:
+                    if question.task_id == task_id:
+                        return question
+        try:
+            url = str(self.settings.scoring_api_base_url) + "random-question"
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            question = Question(**response.json())
+            return question
+        except:
+            # Read local file instead, dealing with rate limits, etc.
+            questions = self.get_questions()
+            return questions[random.randint(0, len(questions)-1)]
+    def _read_answer_file(self) -> List[str]:
+        with open("answers.json", "r") as f:
+            pairs = [QuestionAnswerPair(**pair) for pair in json.load(f)]
+            formatted_data = [pair.get_answer() for pair in pairs]
+        # Return count and the formatted data
+        return formatted_data
+    def submit_answers(self) -> str:
+        """Submits saved answers to the scoring endpoint and returns the result."""
+        answers_payload = self._read_answer_file()
+        agent_code = f"https://huggingface.co/spaces/{self.settings.space_id}/tree/main"
+        submission_data = {
+            "username": self.settings.username,
+            "agent_code": agent_code,
+            "answers": answers_payload}
+        submit_url = str(self.settings.scoring_api_base_url) + "submit"
+        logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+        try:
+            response = requests.post(
+                submit_url, json=submission_data, timeout=60)
+            response.raise_for_status()
+            results = Results.model_validate(response.json())
+            logger.info(
+                f"Submission successful.\n"
+                f"User: {results.username}.\n"
+                f"Overall Score: {results.score}%.\n"
+                f"Correct Count: {results.correct_count}.\n"
+                f"Total Attempted: {results.total_attempted}.\n"
+                f"Message: {results.message}.\n"
+                f"Timestamp: {results.timestamp}.\n"
+            )
+            status_message = (
+                f"Submission Successful!\n"
+                f"User: {results.username}\n"
+                f"Overall Score: {results.score}% "
+                f"({results.correct_count}/{results.total_attempted} correct)\n"
+                f"Message: {results.message}"
+            )
+            return status_message
+        except requests.exceptions.HTTPError as e:
+            error_detail = f"Server responded with status {e.response.status_code}."
+            try:
+                error_json = e.response.json()
+                error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+            except requests.exceptions.JSONDecodeError:
+                error_detail += f" Response: {e.response.text[:500]}"
+            status_message = f"Submission Failed: {error_detail}"
+            logger.info(status_message)
+            return status_message
+        except requests.exceptions.Timeout:
+            status_message = "Submission Failed: The request timed out."
+            logger.info(status_message)
+            return status_message
+        except requests.exceptions.RequestException as e:
+            status_message = f"Submission Failed: Network error - {e}"
+            logger.info(status_message)
+            return status_message
+        except Exception as e:
+            status_message = f"An unexpected error occurred during submission: {e}"
+            logger.info(status_message)
+            return status_message

runner.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from settings import Settings
+from models import QuestionAnswerPair
+from agent import BasicAgent
+import pandas as pd
+import logging
+import json
+import asyncio
+import nest_asyncio
+nest_asyncio.apply()
+logger = logging.getLogger(__name__)
+class Runner():
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    def _save_pairs(self, pairs: list[QuestionAnswerPair]):
+        answers = [pair.model_dump() for pair in pairs if pair is not None]
+        with open("answers.json", "w") as f:
+            json.dump(answers, f, indent=4)
+    def _enrich_question_text(self, item):
+        task_id = item.task_id
+        file_name = item.file_name
+        question_text = (
+            f"{item.question} "
+            "Think hard to answer. Parse all statements in the question to make a plan. "
+            "Your final answer should be a number or as few words as possible. "
+            "If needed, use a comma separated list of numbers and/or strings. Critically "
+            f"review your answer before making it the final answer. task_id: {task_id}."
+        )
+        if file_name:
+            question_text = f"{question_text} file_name: {file_name} (use tools to fetch the file)"
+        return question_text
+    async def _run_agent_async(self, item):
+        """Runs the agent asynchronously."""
+        task_id = item.task_id
+        question_text = self._enrich_question_text(item)
+        try:
+            answer = await asyncio.to_thread(BasicAgent(self.settings), question_text)
+        except Exception as e:
+            logger.error(f"Error running agent on task {task_id}: {e}")
+            answer = f"AGENT ERROR: {e}"
+        return QuestionAnswerPair(task_id=task_id,
+                                  question=item.question, answer=str(answer))
+    def _assign_questions(self, questions):
+        """Runs the asynchronous loop and returns task outputs."""
+        tasks = [self._run_agent_async(item) for item in questions]
+        return asyncio.gather(*tasks)
+    def run_agent(self, questions) -> pd.DataFrame:
+        """Run the agent(s) async, save answers and return a dataframe"""
+        # Assign questions to agents and wait
+        loop = asyncio.get_running_loop()
+        def run_tasks_in_thread():
+            question_answer_pairs = loop.run_until_complete(
+                self._assign_questions(questions))
+            return question_answer_pairs
+        pairs = run_tasks_in_thread()
+        # save json to disk and return a dataframe
+        self._save_pairs(pairs)
+        results_log = [pair.model_dump() for pair in pairs if pair is not None]
+        if not results_log:
+            logger.warning("Agent did not produce any answers to submit.")
+        return pd.DataFrame(results_log)

tools.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import os
+import json
+import logging
+logger = logging.getLogger(__name__)
+import requests
+import shutil
+from typing import Any
+import urllib.parse
+from board_to_fen.predict import get_fen_from_image_path
+from google import genai
+from google.genai import types
+from litellm import completion
+from smolagents import Tool
+from settings import Settings
+class BaseCustomTool(Tool):
+    def __init__(self, settings):
+        super().__init__()
+        self.settings = settings
+class GetTaskFileTool(BaseCustomTool):
+    name = "get_task_file_tool"
+    description = """Download the file_name associated with a given task_id. Get absolute file path"""
+    inputs = {
+        "task_id": {"type": "string", "description": "Task ID"},
+        "file_name": {"type": "string", "description": "File name"},
+    }
+    output_type = "string"
+    def __init__(self, settings):
+        super().__init__(settings)
+        self.directory_name = "downloads"
+        self.create_dir()
+    def forward(self, task_id: str, file_name: str) -> str:
+        try:
+            response = requests.get(f"{self.settings.evaluation_api_base_url}/files/{task_id}", timeout=15)
+            response.raise_for_status()
+            with open(f"{self.directory_name}/{file_name}", 'wb') as file:
+                file.write(response.content)
+            return os.path.abspath(f"{self.directory_name}/{file_name}")
+        except Exception as e:
+            # Fetch the local file instead, dealing with rate limits, etc.
+            shutil.copy2(f"files/{file_name}", f"{self.directory_name}/{file_name}")
+            return f"{self.directory_name}/{file_name}"
+    def create_dir(self):
+        # Create the directory if it doesn't exist
+        if not os.path.exists(self.directory_name):
+            os.makedirs(self.directory_name)
+            logger.info(f"Directory '{self.directory_name}' created successfully.")
+        else:
+            logger.debug(f"Directory '{self.directory_name}' already exists.")
+class VideoUnderstandingTool(BaseCustomTool):
+    name = "VideoUnderstanding"
+    description = "Prompt a YouTube video with questions to understand its content."
+    inputs = {
+        "youtube_url": {"type": "string", "description": "The URL of the YouTube video"},
+        "prompt": {"type": "string", "description": "A question or request regarding the video"},
+    }
+    output_type = "string"
+    def __init__(self, settings, model):
+        super().__init__(settings)
+        self.model = model
+    def forward(self, youtube_url: str, prompt: str) -> str:
+        client = genai.Client(api_key=self.settings.gemini_api_key.get_secret_value())
+        try:
+            video_description = client.models.generate_content(
+                model=self.model,
+                contents=types.Content(
+                    parts=[
+                        types.Part(
+                            file_data=types.FileData(file_uri=youtube_url)
+                        ),
+                        types.Part(text=prompt)
+                    ]
+                )
+            )
+            return video_description.text
+        except Exception as e:
+            logger.error(f"Error understanding video: {e}")
+            return False
+class AudioUnderstandingTool(BaseCustomTool):
+    name = "AudioUnderstanding"
+    description = "Prompt a local audio file with questions to understand its content."
+    inputs = {
+        "file_path": {"type": "string", "description": "The local file of the audio"},
+        "prompt": {"type": "string", "description": "A question or request regarding the audio"},
+    }
+    output_type = "string"
+    def __init__(self, settings, model):
+        super().__init__(settings)
+        self.model = model
+    def forward(self, file_path: str, prompt: str) -> str:
+        client = genai.Client(api_key=self.settings.gemini_api_key.get_secret_value())
+        try:
+            mp3_file = client.files.upload(file=f"{file_path}")
+            audio_description = client.models.generate_content(
+                model=self.model,
+                contents=[prompt, mp3_file]
+            )
+            return audio_description.text
+        except Exception as e:
+            logger.error(f"Error understanding audio: {e}")
+            return False
+class ConvertChessMoveTool(BaseCustomTool):
+    name = "ConvertChessMove"
+    description = "Convert a chess move from coordinate notation to algebraic notation."
+    inputs = {
+        "piece_placement": {"type": "string", "description": "The chess piece placement in plain text"},
+        "move": {"type": "string", "description": "The move in coordinate notation (e.g., e2e4)"},
+    }
+    output_type = "string"
+    def __init__(self, settings, model):
+        super().__init__(settings)
+        self.model = model
+    def forward(self, piece_placement: str, move: str) -> str:
+        move_message = f"""Convert this chess move from coordinate notation to algebraic
+notation: {move}. Use the following {piece_placement}. Do not provide any additional
+thinking or commentary in the response, the algebraic notation only."""
+        messages = [{ "content": move_message,"role": "user"}]
+        response = completion(
+                    model=self.model,
+                    temperature=0.0,
+                    messages=messages,
+                    api_key=self.settings.openrouter_api_key.get_secret_value()
+                )
+        return response.choices[0].message.content
+class BestChessMoveTool(BaseCustomTool):
+    name = "BestChessMove"
+    description = "Get best chess move in coordinate notation based on a FEN representation."
+    inputs = {
+        "fen": {"type": "string", "description": "The FEN (Forsyth-Edwards Notation) \
+                representation of the chess position. Example \
+                rn1q1rk1/pp2b1pp/2p2n2/3p1pB1/3P4/1QP2N2/PP1N1PPP/R4RK1 b - - 1 11"},
+    }
+    output_type = "string"
+    def forward(self, fen: str) -> str:
+        try:
+            url = f"{self.settings.chess_eval_url}?fen={urllib.parse.quote(fen)}&depth=15"
+            response = requests.get(url, timeout=15)
+            if response.status_code == 200 and json.loads(response.text)['success'] == True:
+                return json.loads(response.text)['bestmove'].split()[1]
+            else:
+                raise ValueError(f"Error getting chess evaluation: {response.status_code}")
+        except Exception as e:
+            logger.error(f"Error getting chess evaluation: {e}")
+class ChessGameFenTool(BaseCustomTool):
+    name = "ChessGameFen"
+    description = "Get a FEN representation given chess piece placement and a move."
+    inputs = {
+        "piece_placement": {"type": "string", "description": "The chess piece placement in plain text"},
+        "player_turn": {"type": "string",
+                        "description": "The player with the next turn in the match, black or white"},
+    }
+    output_type = "string"
+    def __init__(self, settings, model):
+        super().__init__(settings)
+        self.model = model
+    def forward(self, piece_placement: str, player_turn: str) -> str:
+        """Use the tool."""
+        fen_message = f"""Assuming {player_turn} has the next turn, Use the following placement
+{piece_placement} and provide the board state as FEN. Do not provide any
+additional thinking or commentary in the response, the FEN only."""
+        messages = [{ "content": fen_message,"role": "user"}]
+        response = completion(
+                    model=self.model,
+                    temperature=0.0,
+                    messages=messages,
+                    api_key=self.settings.openrouter_api_key.get_secret_value()
+                )
+        return response.choices[0].message.content
+class ChessPiecePlacementTool(Tool):
+    name = "ChessPiecePlacement"
+    description = "Get chess piece placement information from an image of a board."
+    inputs = {
+        "image_path": {"type": "string", "description": "The local file of the chess board image"},
+    }
+    output_type = "string"
+    def forward(self, image_path: str) -> str:
+        return get_fen_from_image_path(image_path)