Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 8 days ago

Commit

09253eb

verified ·

1 Parent(s): 836efcb

Update agent.py

Browse files

Files changed (1) hide show

agent.py +43 -120

agent.py CHANGED Viewed

@@ -5,111 +5,60 @@ from typing import Any
 from llama_index.llms.openai import OpenAI
 from llama_index.core.agent.react import ReActAgent
-from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.core.tools import FunctionTool, ToolMetadata
-# Tool: DuckDuckGo Web Search
-from llama_index.tools.duckduckgo import DuckDuckGoSearchTool
-# Tool: Python code eval (for simple code/number/output questions)
 def eval_python_code(code: str) -> str:
-    """
-    Evaluate simple Python code and return result as string.
-    Use for 'What is the output of this code?' or math.
-    """
     try:
-        # Only eval expressions (NOT exec for safety!)
         return str(eval(code, {"__builtins__": {}}))
     except Exception as e:
         return f"ERROR: {e}"
-# Tool: Strict output formatting
 def format_gaia_answer(answer: str, question: str = "") -> str:
-    """Postprocess: GAIA strict answer format enforcement."""
     if not answer:
         return ""
-    # Remove quotes/brackets/periods, apologies, "Final Answer:"
     answer = re.sub(r'(?i)final answer:?\s*', '', answer).strip()
     answer = re.sub(r'(?i)i(\'?m| cannot| can\'t| unable to| apologize| not available|process the file).*', '', answer).strip()
     if answer.startswith('"') and answer.endswith('"'): answer = answer[1:-1]
     if answer.startswith('[') and answer.endswith(']'): answer = answer[1:-1]
     if not re.match(r'^[A-Za-z]+\.$', answer): answer = re.sub(r'\.$', '', answer)
-    # Numeric
-    if re.search(r'how many|number of|at bats|total sales|albums|output.*python|highest number', question, re.I):
-        num = re.search(r'(\$?\d[\d,\.]*)', answer)
-        if num: return num.group(1).replace(',', '')
-    # Surname/first name/code/city
-    if 'first name' in question: return answer.split()[0]
-    if 'surname' in question: return answer.split()[-1]
-    if 'city' in question: return answer.split()[0]
-    if re.search(r'IOC country code|award number|NASA', question, re.I):
-        code = re.search(r'[A-Z0-9]{3,}', answer)
-        if code: return code.group(0)
-    if re.search(r'list|comma.*separated|page numbers', question, re.I):
-        items = [x.strip('",.').lower() for x in re.split(r'[,\n]', answer) if x.strip()]
-        if 'page numbers' in question:
-            nums = [int(x) for x in re.findall(r'\d+', answer)]
-            return ', '.join(str(n) for n in sorted(nums))
-        if 'ingredient' in question or 'vegetable' in question:
-            merged = []
-            skip = False
-            for i, item in enumerate(items):
-                if skip: skip = False; continue
-                if i+1 < len(items) and item in ['sweet', 'green', 'lemon', 'ripe', 'whole', 'fresh']:
-                    merged.append(f"{item} {items[i+1]}")
-                    skip = True
-                else: merged.append(item)
-            merged = sorted(set(merged))
-            return ', '.join(merged)
-        return ', '.join(items)
     return answer.strip().rstrip('.').strip()
-# Tool: OCR for images (incl. chessboards/screenshots)
-def ocr_image(file_path: str) -> str:
-    """Extract text from image file."""
-    from PIL import Image
-    import pytesseract
-    try:
-        img = Image.open(file_path)
-        return pytesseract.image_to_string(img)
-    except Exception as e:
-        return f"ERROR: {e}"
-# Tool: Audio transcription (Whisper)
-def transcribe_audio(file_path: str) -> str:
-    """Transcribe audio file with Whisper."""
-    try:
-        import whisper
-        model = whisper.load_model("base")
-        result = model.transcribe(file_path)
-        return result.get("text", "")
-    except Exception as e:
-        return f"ERROR: {e}"
-# Tool: YouTube video transcription
-def transcribe_youtube(url: str) -> str:
-    """Download and transcribe a YouTube video (audio only)."""
-    import tempfile, os
-    try:
-        import whisper
-        import yt_dlp
-        with tempfile.TemporaryDirectory() as tmpdir:
-            ydl_opts = {'format': 'bestaudio/best', 'outtmpl': os.path.join(tmpdir, 'audio.%(ext)s')}
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-            audio_path = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.startswith("audio")][0]
-            model = whisper.load_model("base")
-            result = model.transcribe(audio_path)
-            return result.get("text", "")
-    except Exception as e:
-        return f"ERROR: {e}"
-# ---- LlamaIndex agent and workflow setup ----
-# 1. Initialize LLM
 llm = OpenAI(model="gpt-4o", api_key=os.environ.get("OPENAI_API_KEY"))
-# 2. Register tools
 tools = [
     DuckDuckGoSearchTool(),
     FunctionTool.from_defaults(
@@ -117,21 +66,6 @@ tools = [
         name="python_eval",
         description="Evaluate simple Python code and return result as string. Use for math or code output."
     ),
-    FunctionTool.from_defaults(
-        ocr_image,
-        name="ocr_image",
-        description="Extract text from an image file (provide file path)."
-    ),
-    FunctionTool.from_defaults(
-        transcribe_audio,
-        name="transcribe_audio",
-        description="Transcribe an audio file using Whisper (provide file path)."
-    ),
-    FunctionTool.from_defaults(
-        transcribe_youtube,
-        name="transcribe_youtube",
-        description="Download a YouTube video, extract and transcribe its audio using Whisper."
-    ),
     FunctionTool.from_defaults(
         format_gaia_answer,
         name="format_gaia_answer",
@@ -139,7 +73,7 @@ tools = [
     ),
 ]
-# 3. Agent setup (ReAct, so can reason with tools)
 agent = ReActAgent.from_tools(
     tools=tools,
     llm=llm,
@@ -147,27 +81,16 @@ agent = ReActAgent.from_tools(
     verbose=False
 )
-# 4. Async entrypoint, suitable for HuggingFace Spaces or Gradio
 async def answer_question(question: str, task_id: str = None, file_path: str = None) -> str:
-    """
-    Main async function for the agent.
-    Passes the question and uses tools as needed.
-    - task_id: for future use, if you want to fetch files from a remote API.
-    - file_path: if a file (image, audio, etc) is present locally, pass it.
-    """
-    # Example: if you want to always try OCR/audio on a file before reasoning, you could do:
-    # If question contains "image" or "chess" and file_path is set, run OCR first
-    if file_path and any(word in question.lower() for word in ["image", "chess", "screenshot"]):
-        ocr_text = ocr_image(file_path)
-        question = f"Extracted text from image: {ocr_text}\n\n{question}"
-    if file_path and any(word in question.lower() for word in ["audio", "mp3", "transcribe"]):
-        audio_text = transcribe_audio(file_path)
-        question = f"Transcribed audio: {audio_text}\n\n{question}"
-    # Run agent
     result = await agent.achat(question)
     return result.response
-# Synchronous wrapper for legacy compat
 def answer_question_sync(question: str, task_id: str = None, file_path: str = None) -> str:
-    return asyncio.run(answer_question(question, task_id, file_path))

 from llama_index.llms.openai import OpenAI
 from llama_index.core.agent.react import ReActAgent
+from llama_index.core.tools import FunctionTool
+# Correct import for LlamaIndex >= 0.10
+from llama_index.tools.duckduckgo_search import DuckDuckGoSearchTool
+# Simple tool: Evaluate Python code for math/code questions
 def eval_python_code(code: str) -> str:
     try:
         return str(eval(code, {"__builtins__": {}}))
     except Exception as e:
         return f"ERROR: {e}"
+# Strict output formatting for GAIA
 def format_gaia_answer(answer: str, question: str = "") -> str:
     if not answer:
         return ""
     answer = re.sub(r'(?i)final answer:?\s*', '', answer).strip()
     answer = re.sub(r'(?i)i(\'?m| cannot| can\'t| unable to| apologize| not available|process the file).*', '', answer).strip()
     if answer.startswith('"') and answer.endswith('"'): answer = answer[1:-1]
     if answer.startswith('[') and answer.endswith(']'): answer = answer[1:-1]
     if not re.match(r'^[A-Za-z]+\.$', answer): answer = re.sub(r'\.$', '', answer)
+    if question:
+        if re.search(r'how many|number of|at bats|total sales|albums|output.*python|highest number', question, re.I):
+            num = re.search(r'(\$?\d[\d,\.]*)', answer)
+            if num: return num.group(1).replace(',', '')
+        if 'first name' in question: return answer.split()[0]
+        if 'surname' in question: return answer.split()[-1]
+        if 'city' in question: return answer.split()[0]
+        if re.search(r'IOC country code|award number|NASA', question, re.I):
+            code = re.search(r'[A-Z0-9]{3,}', answer)
+            if code: return code.group(0)
+        if re.search(r'list|comma.*separated|page numbers', question, re.I):
+            items = [x.strip('",.').lower() for x in re.split(r'[,\n]', answer) if x.strip()]
+            if 'page numbers' in question:
+                nums = [int(x) for x in re.findall(r'\d+', answer)]
+                return ', '.join(str(n) for n in sorted(nums))
+            if 'ingredient' in question or 'vegetable' in question:
+                merged = []
+                skip = False
+                for i, item in enumerate(items):
+                    if skip: skip = False; continue
+                    if i+1 < len(items) and item in ['sweet', 'green', 'lemon', 'ripe', 'whole', 'fresh']:
+                        merged.append(f"{item} {items[i+1]}")
+                        skip = True
+                    else: merged.append(item)
+                merged = sorted(set(merged))
+                return ', '.join(merged)
+            return ', '.join(items)
     return answer.strip().rstrip('.').strip()
+# LLM setup
 llm = OpenAI(model="gpt-4o", api_key=os.environ.get("OPENAI_API_KEY"))
+# Tool registry
 tools = [
     DuckDuckGoSearchTool(),
     FunctionTool.from_defaults(
         name="python_eval",
         description="Evaluate simple Python code and return result as string. Use for math or code output."
     ),
     FunctionTool.from_defaults(
         format_gaia_answer,
         name="format_gaia_answer",
     ),
 ]
+# Main agent
 agent = ReActAgent.from_tools(
     tools=tools,
     llm=llm,
     verbose=False
 )
+# Async entrypoint
 async def answer_question(question: str, task_id: str = None, file_path: str = None) -> str:
     result = await agent.achat(question)
     return result.response
+# Synchronous wrapper
 def answer_question_sync(question: str, task_id: str = None, file_path: str = None) -> str:
+    return asyncio.run(answer_question(question, task_id, file_path))
+# For compatibility with app.py (GAIAAgent class)
+class GaiaAgent:
+    def __call__(self, question: str, task_id: str = None, file_path: str = None) -> str:
+        return answer_question_sync(question, task_id, file_path)