Final_Assignment_Template

Sleeping

App Files Files Community

dawid-lorek commited on 4 days ago

Commit

e225216

verified ·

1 Parent(s): c1a2949

Update agent.py

Browse files

Files changed (1) hide show

agent.py +134 -160

agent.py CHANGED Viewed

@@ -1,177 +1,151 @@
 import os
-import base64
-import requests
 import tempfile
 import re
-from openai import OpenAI
-from duckduckgo_search import DDGS
 import pandas as pd
-class BasicAgent:
-    def __init__(self):
-        self.llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-        print("BasicAgent initialized.")
-    def web_search(self, query: str, max_results: int = 5) -> str:
-        try:
-            with DDGS() as ddgs:
-                results = list(ddgs.text(query, max_results=max_results))
-            if not results:
-                return ""
-            formatted_results = ""
-            for i, result in enumerate(results, 1):
-                title = result.get('title', '')
-                body = result.get('body', '')
-                href = result.get('href', '')
-                formatted_results += f"{i}. {title}\n   URL: {href}\n   Description: {body}\n\n"
-            return formatted_results
-        except Exception as e:
-            return ""
-    def fetch_file(self, task_id):
-        DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-        try:
-            url = f"{DEFAULT_API_URL}/files/{task_id}"
-            r = requests.get(url, timeout=10)
-            r.raise_for_status()
-            content_type = r.headers.get("Content-Type", "")
-            return url, r.content, content_type
-        except:
-            return None, None, None
-    def transcribe_audio(self, audio_bytes):
-        try:
-            import openai
-            openai.api_key = os.getenv("OPENAI_API_KEY")
-            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
-                f.write(audio_bytes)
-                f.flush()
-                audio_path = f.name
-            transcript = openai.Audio.transcribe("whisper-1", open(audio_path, "rb"))
-            return transcript.get("text", "")
-        except Exception as e:
-            return ""
-    def analyze_excel(self, file_bytes):
-        try:
-            with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
-                f.write(file_bytes)
-                f.flush()
-                excel_path = f.name
-            df = pd.read_excel(excel_path)
-            # Example: look for a column called "Type" (food/drink) and "Sales"
-            if 'Type' in df.columns and 'Sales' in df.columns:
-                total = df[df['Type'].str.lower() == 'food']['Sales'].sum()
-                return str(round(total, 2))
-            # Fallback: sum all numbers (not robust, improve as needed)
-            total = df.select_dtypes(include='number').sum().sum()
-            return str(round(total, 2))
-        except Exception as e:
-            return ""
-    def execute_python(self, code_bytes):
-        # Caution: For real use, sandbox or disable entirely.
-        try:
-            code = code_bytes.decode("utf-8")
-            import io, contextlib
-            buf = io.StringIO()
-            with contextlib.redirect_stdout(buf):
-                exec(code, {})
-            output = buf.getvalue().strip().split('\n')[-1]
-            # Extract only the final numeric output if possible
-            numbers = re.findall(r'[-+]?\d*\.\d+|\d+', output)
-            return numbers[-1] if numbers else output
-        except Exception as e:
-            return ""
-    def vision_chess_move(self, image_bytes):
-        # GPT-4o vision required for this.
-        # For now, return "" so LLM will still try web search
-        return ""
-    def __call__(self, question: str, task_id: str = None) -> str:
-        # 1. Check for file
-        file_url, file_content, file_type = self.fetch_file(task_id) if task_id else (None, None, None)
-        file_result = ""
-        # AUDIO
-        if file_type and ("audio" in file_type or file_url and file_url.lower().endswith(('.mp3', '.wav'))):
-            file_result = self.transcribe_audio(file_content)
-        # EXCEL
-        elif file_type and ("spreadsheet" in file_type or file_url and file_url.lower().endswith(('.xls', '.xlsx'))):
-            file_result = self.analyze_excel(file_content)
-        # PYTHON
-        elif file_type and ("python" in file_type or file_url and file_url.lower().endswith('.py')):
-            file_result = self.execute_python(file_content)
-        # IMAGE (for chess)
-        elif file_type and "image" in file_type:
-            file_result = self.vision_chess_move(file_content)
-        # 2. Web search
-        search_snippet = self.web_search(question)
-        # 3. Build the prompt
-        prompt = (
-            "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: "
-            "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
-            "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
-            "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
-            "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
         )
-        if file_result:
-            prompt += f"File content: {file_result}\n\n"
-        prompt += f"Here are web search results and the question:\n{search_snippet}\n\nQuestion: {question}"
-        # 4. LLM call
-        response = self.llm.chat.completions.create(
-            model="gpt-4o",
-            messages=[{"role": "system", "content": prompt}],
-            temperature=0.0,
-            max_tokens=512,
-        )
-        answer = response.choices[0].message.content.strip()
-        final_line = ""
-        for line in answer.splitlines():
-            if line.strip().lower().startswith("final answer:"):
-                final_line = line.split(":", 1)[-1].strip(" .\"'")
-                break
-        # If answer is empty or not plausible, try again with a stripped-down prompt
-        bads = [
-            "", "unknown", "unable to determine", "unable to provide page numbers",
-            "unable to access video content directly", "unable to analyze video content",
-            "unable to determine without code", "unable to determine without file",
-            "follow the steps to locate the paper and find the nasa award number in the acknowledgment section",
-            "i am unable to view images or access external content directly", "unable to determine without access to the file",
-            "no results found", "n/a", "[your final answer]"
-        ]
-        if final_line.lower() in bads or final_line.lower().startswith("unable") or final_line.lower().startswith("follow the steps") or final_line.lower().startswith("i am unable"):
-            retry_prompt = (
-                "Return only the answer to the following question, in the correct format and with no explanation or apologies. "
-            )
-            if file_result:
-                retry_prompt += f"File content: {file_result}\n\n"
-            retry_prompt += f"Web search: {search_snippet}\n\nQuestion: {question}\nFINAL ANSWER:"
-            response2 = self.llm.chat.completions.create(
-                model="gpt-4o",
-                messages=[{"role": "system", "content": retry_prompt}],
-                temperature=0.0,
-                max_tokens=128,
-            )
-            retry_answer = response2.choices[0].message.content.strip()
-            for line in retry_answer.splitlines():
-                if line.strip().lower().startswith("final answer:"):
-                    final_line = line.split(":", 1)[-1].strip(" .\"'")
-                    break
-                elif retry_answer:
-                    final_line = retry_answer.strip(" .\"'")
-            # Still blank? Fallback to web numbers/words
-            if not final_line:
-                numbers = re.findall(r'\b\d+\b', search_snippet)
-                if numbers:
-                    final_line = numbers[0]
-                elif file_result and re.findall(r'\b\d+\b', file_result):
-                    final_line = re.findall(r'\b\d+\b', file_result)[0]
-        if final_line.startswith('"') and final_line.endswith('"'):
-            final_line = final_line[1:-1]
-        return final_line

 import os
 import tempfile
+import requests
 import re
 import pandas as pd
+from langchain_openai import ChatOpenAI
+from langchain.agents import initialize_agent, Tool
+from langchain.agents.agent_types import AgentType
+from langchain_community.tools import DuckDuckGoSearchRun
+# Audio transcription tool (OpenAI Whisper)
+def transcribe_audio_tool(file_url: str) -> str:
+    import openai
+    openai.api_key = os.getenv("OPENAI_API_KEY")
+    try:
+        r = requests.get(file_url, timeout=20)
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+            f.write(r.content)
+            f.flush()
+            path = f.name
+        transcript = openai.Audio.transcribe("whisper-1", open(path, "rb"))
+        return transcript.get("text", "")
+    except Exception as e:
+        return ""
+# Excel reading tool
+def read_excel_tool(file_url: str) -> str:
+    try:
+        r = requests.get(file_url, timeout=20)
+        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
+            f.write(r.content)
+            f.flush()
+            path = f.name
+        df = pd.read_excel(path)
+        if 'Type' in df.columns and 'Sales' in df.columns:
+            total = df[df['Type'].str.lower() == 'food']['Sales'].sum()
+            return str(round(total, 2))
+        # fallback: sum all numbers
+        total = df.select_dtypes(include='number').sum().sum()
+        return str(round(total, 2))
+    except Exception as e:
+        return ""
+# Python code execution tool (CAUTION: sandbox this for production!)
+def execute_python_tool(code_url: str) -> str:
+    try:
+        r = requests.get(code_url, timeout=20)
+        code = r.content.decode("utf-8")
+        import io, contextlib
+        buf = io.StringIO()
+        with contextlib.redirect_stdout(buf):
+            exec(code, {})
+        output = buf.getvalue().strip().split('\n')[-1]
+        # Only final numeric output if possible
+        numbers = re.findall(r'[-+]?\d*\.\d+|\d+', output)
+        return numbers[-1] if numbers else output
+    except Exception as e:
+        return ""
+# Number extraction tool (example of "reasoning" tool)
+def extract_numbers(text: str) -> str:
+    nums = re.findall(r'\b\d+\b', text)
+    return ', '.join(nums) if nums else ""
+def extract_names(text: str) -> str:
+    words = re.findall(r'\b[A-Z][a-z]{2,}\b', text)
+    return ', '.join(words) if words else ""
+# Tools list
+tools = [
+    Tool(
+        name="DuckDuckGo Search",
+        func=DuckDuckGoSearchRun().run,
+        description="Use to find factual information or recent events."
+    ),
+    Tool(
+        name="Transcribe Audio",
+        func=transcribe_audio_tool,
+        description="Use to transcribe an audio file from a URL (mp3 or wav)."
+    ),
+    Tool(
+        name="Read Excel File",
+        func=read_excel_tool,
+        description="Use to read an Excel spreadsheet file from a URL (xlsx) and sum food sales or extract tables."
+    ),
+    Tool(
+        name="Execute Python",
+        func=execute_python_tool,
+        description="Use to execute a Python file from a URL and get the final output."
+    ),
+    Tool(
+        name="Extract Numbers",
+        func=extract_numbers,
+        description="Use to extract all numbers from provided text."
+    ),
+    Tool(
+        name="Extract Names",
+        func=extract_names,
+        description="Use to extract capitalized names from provided text."
+    )
+]
+PROMPT = (
+    "You are a general AI assistant. I will ask you a question. "
+    "Reason step by step, and use tools as needed. Only after you are sure, answer with the template: "
+    "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
+    "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
+    "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
+    "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
+)
+llm = ChatOpenAI(model="gpt-4o", temperature=0)
+class BasicAgent:
+    def __init__(self):
+        self.agent = initialize_agent(
+            tools=tools,
+            llm=llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=False,
+            handle_parsing_errors=True
         )
+        self.prompt = PROMPT
+    def fetch_file_url(self, task_id):
+        DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+        try:
+            url = f"{DEFAULT_API_URL}/files/{task_id}"
+            r = requests.head(url, timeout=5)
+            if r.status_code == 200:
+                return url
+        except:
+            pass
+        return None
+    def __call__(self, question: str, task_id: str = None) -> str:
+        file_url = self.fetch_file_url(task_id) if task_id else None
+        if file_url:
+            # If file is attached, add the info for the agent
+            question_aug = f"{question}\nThis task has assigned file at this URL: {file_url}"
+        else:
+            question_aug = question
+        # Add instruction prompt
+        full_prompt = self.prompt + "\n" + question_aug
+        result = self.agent.run(full_prompt)
+        # Extract only FINAL ANSWER
+        for line in result.splitlines():
+            if line.strip().lower().startswith("final answer:"):
+                return line.split(":", 1)[-1].strip(" .\"'")
+        return result