dawid-lorek commited on
Commit
e225216
·
verified ·
1 Parent(s): c1a2949

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +134 -160
agent.py CHANGED
@@ -1,177 +1,151 @@
1
  import os
2
- import base64
3
- import requests
4
  import tempfile
 
5
  import re
6
- from openai import OpenAI
7
- from duckduckgo_search import DDGS
8
-
9
  import pandas as pd
10
 
11
- class BasicAgent:
12
- def __init__(self):
13
- self.llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14
- print("BasicAgent initialized.")
15
 
16
- def web_search(self, query: str, max_results: int = 5) -> str:
17
- try:
18
- with DDGS() as ddgs:
19
- results = list(ddgs.text(query, max_results=max_results))
20
- if not results:
21
- return ""
22
- formatted_results = ""
23
- for i, result in enumerate(results, 1):
24
- title = result.get('title', '')
25
- body = result.get('body', '')
26
- href = result.get('href', '')
27
- formatted_results += f"{i}. {title}\n URL: {href}\n Description: {body}\n\n"
28
- return formatted_results
29
- except Exception as e:
30
- return ""
31
 
32
- def fetch_file(self, task_id):
33
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
34
- try:
35
- url = f"{DEFAULT_API_URL}/files/{task_id}"
36
- r = requests.get(url, timeout=10)
37
- r.raise_for_status()
38
- content_type = r.headers.get("Content-Type", "")
39
- return url, r.content, content_type
40
- except:
41
- return None, None, None
 
 
 
 
 
 
 
42
 
43
- def transcribe_audio(self, audio_bytes):
44
- try:
45
- import openai
46
- openai.api_key = os.getenv("OPENAI_API_KEY")
47
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
48
- f.write(audio_bytes)
49
- f.flush()
50
- audio_path = f.name
51
- transcript = openai.Audio.transcribe("whisper-1", open(audio_path, "rb"))
52
- return transcript.get("text", "")
53
- except Exception as e:
54
- return ""
 
 
 
55
 
56
- def analyze_excel(self, file_bytes):
57
- try:
58
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
59
- f.write(file_bytes)
60
- f.flush()
61
- excel_path = f.name
62
- df = pd.read_excel(excel_path)
63
- # Example: look for a column called "Type" (food/drink) and "Sales"
64
- if 'Type' in df.columns and 'Sales' in df.columns:
65
- total = df[df['Type'].str.lower() == 'food']['Sales'].sum()
66
- return str(round(total, 2))
67
- # Fallback: sum all numbers (not robust, improve as needed)
68
- total = df.select_dtypes(include='number').sum().sum()
69
- return str(round(total, 2))
70
- except Exception as e:
71
- return ""
72
 
73
- def execute_python(self, code_bytes):
74
- # Caution: For real use, sandbox or disable entirely.
75
- try:
76
- code = code_bytes.decode("utf-8")
77
- import io, contextlib
78
- buf = io.StringIO()
79
- with contextlib.redirect_stdout(buf):
80
- exec(code, {})
81
- output = buf.getvalue().strip().split('\n')[-1]
82
- # Extract only the final numeric output if possible
83
- numbers = re.findall(r'[-+]?\d*\.\d+|\d+', output)
84
- return numbers[-1] if numbers else output
85
- except Exception as e:
86
- return ""
87
 
88
- def vision_chess_move(self, image_bytes):
89
- # GPT-4o vision required for this.
90
- # For now, return "" so LLM will still try web search
91
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- def __call__(self, question: str, task_id: str = None) -> str:
94
- # 1. Check for file
95
- file_url, file_content, file_type = self.fetch_file(task_id) if task_id else (None, None, None)
96
- file_result = ""
97
- # AUDIO
98
- if file_type and ("audio" in file_type or file_url and file_url.lower().endswith(('.mp3', '.wav'))):
99
- file_result = self.transcribe_audio(file_content)
100
- # EXCEL
101
- elif file_type and ("spreadsheet" in file_type or file_url and file_url.lower().endswith(('.xls', '.xlsx'))):
102
- file_result = self.analyze_excel(file_content)
103
- # PYTHON
104
- elif file_type and ("python" in file_type or file_url and file_url.lower().endswith('.py')):
105
- file_result = self.execute_python(file_content)
106
- # IMAGE (for chess)
107
- elif file_type and "image" in file_type:
108
- file_result = self.vision_chess_move(file_content)
109
 
110
- # 2. Web search
111
- search_snippet = self.web_search(question)
112
 
113
- # 3. Build the prompt
114
- prompt = (
115
- "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: "
116
- "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
117
- "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
118
- "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
119
- "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
 
120
  )
121
- if file_result:
122
- prompt += f"File content: {file_result}\n\n"
123
- prompt += f"Here are web search results and the question:\n{search_snippet}\n\nQuestion: {question}"
124
 
125
- # 4. LLM call
126
- response = self.llm.chat.completions.create(
127
- model="gpt-4o",
128
- messages=[{"role": "system", "content": prompt}],
129
- temperature=0.0,
130
- max_tokens=512,
131
- )
132
- answer = response.choices[0].message.content.strip()
133
- final_line = ""
134
- for line in answer.splitlines():
135
- if line.strip().lower().startswith("final answer:"):
136
- final_line = line.split(":", 1)[-1].strip(" .\"'")
137
- break
138
 
139
- # If answer is empty or not plausible, try again with a stripped-down prompt
140
- bads = [
141
- "", "unknown", "unable to determine", "unable to provide page numbers",
142
- "unable to access video content directly", "unable to analyze video content",
143
- "unable to determine without code", "unable to determine without file",
144
- "follow the steps to locate the paper and find the nasa award number in the acknowledgment section",
145
- "i am unable to view images or access external content directly", "unable to determine without access to the file",
146
- "no results found", "n/a", "[your final answer]"
147
- ]
148
- if final_line.lower() in bads or final_line.lower().startswith("unable") or final_line.lower().startswith("follow the steps") or final_line.lower().startswith("i am unable"):
149
- retry_prompt = (
150
- "Return only the answer to the following question, in the correct format and with no explanation or apologies. "
151
- )
152
- if file_result:
153
- retry_prompt += f"File content: {file_result}\n\n"
154
- retry_prompt += f"Web search: {search_snippet}\n\nQuestion: {question}\nFINAL ANSWER:"
155
- response2 = self.llm.chat.completions.create(
156
- model="gpt-4o",
157
- messages=[{"role": "system", "content": retry_prompt}],
158
- temperature=0.0,
159
- max_tokens=128,
160
- )
161
- retry_answer = response2.choices[0].message.content.strip()
162
- for line in retry_answer.splitlines():
163
- if line.strip().lower().startswith("final answer:"):
164
- final_line = line.split(":", 1)[-1].strip(" .\"'")
165
- break
166
- elif retry_answer:
167
- final_line = retry_answer.strip(" .\"'")
168
- # Still blank? Fallback to web numbers/words
169
- if not final_line:
170
- numbers = re.findall(r'\b\d+\b', search_snippet)
171
- if numbers:
172
- final_line = numbers[0]
173
- elif file_result and re.findall(r'\b\d+\b', file_result):
174
- final_line = re.findall(r'\b\d+\b', file_result)[0]
175
- if final_line.startswith('"') and final_line.endswith('"'):
176
- final_line = final_line[1:-1]
177
- return final_line
 
1
  import os
 
 
2
  import tempfile
3
+ import requests
4
  import re
 
 
 
5
  import pandas as pd
6
 
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.agents import initialize_agent, Tool
9
+ from langchain.agents.agent_types import AgentType
10
+ from langchain_community.tools import DuckDuckGoSearchRun
11
 
12
+ # Audio transcription tool (OpenAI Whisper)
13
+ def transcribe_audio_tool(file_url: str) -> str:
14
+ import openai
15
+ openai.api_key = os.getenv("OPENAI_API_KEY")
16
+ try:
17
+ r = requests.get(file_url, timeout=20)
18
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
19
+ f.write(r.content)
20
+ f.flush()
21
+ path = f.name
22
+ transcript = openai.Audio.transcribe("whisper-1", open(path, "rb"))
23
+ return transcript.get("text", "")
24
+ except Exception as e:
25
+ return ""
 
26
 
27
+ # Excel reading tool
28
+ def read_excel_tool(file_url: str) -> str:
29
+ try:
30
+ r = requests.get(file_url, timeout=20)
31
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
32
+ f.write(r.content)
33
+ f.flush()
34
+ path = f.name
35
+ df = pd.read_excel(path)
36
+ if 'Type' in df.columns and 'Sales' in df.columns:
37
+ total = df[df['Type'].str.lower() == 'food']['Sales'].sum()
38
+ return str(round(total, 2))
39
+ # fallback: sum all numbers
40
+ total = df.select_dtypes(include='number').sum().sum()
41
+ return str(round(total, 2))
42
+ except Exception as e:
43
+ return ""
44
 
45
+ # Python code execution tool (CAUTION: sandbox this for production!)
46
+ def execute_python_tool(code_url: str) -> str:
47
+ try:
48
+ r = requests.get(code_url, timeout=20)
49
+ code = r.content.decode("utf-8")
50
+ import io, contextlib
51
+ buf = io.StringIO()
52
+ with contextlib.redirect_stdout(buf):
53
+ exec(code, {})
54
+ output = buf.getvalue().strip().split('\n')[-1]
55
+ # Only final numeric output if possible
56
+ numbers = re.findall(r'[-+]?\d*\.\d+|\d+', output)
57
+ return numbers[-1] if numbers else output
58
+ except Exception as e:
59
+ return ""
60
 
61
+ # Number extraction tool (example of "reasoning" tool)
62
+ def extract_numbers(text: str) -> str:
63
+ nums = re.findall(r'\b\d+\b', text)
64
+ return ', '.join(nums) if nums else ""
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ def extract_names(text: str) -> str:
67
+ words = re.findall(r'\b[A-Z][a-z]{2,}\b', text)
68
+ return ', '.join(words) if words else ""
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # Tools list
71
+ tools = [
72
+ Tool(
73
+ name="DuckDuckGo Search",
74
+ func=DuckDuckGoSearchRun().run,
75
+ description="Use to find factual information or recent events."
76
+ ),
77
+ Tool(
78
+ name="Transcribe Audio",
79
+ func=transcribe_audio_tool,
80
+ description="Use to transcribe an audio file from a URL (mp3 or wav)."
81
+ ),
82
+ Tool(
83
+ name="Read Excel File",
84
+ func=read_excel_tool,
85
+ description="Use to read an Excel spreadsheet file from a URL (xlsx) and sum food sales or extract tables."
86
+ ),
87
+ Tool(
88
+ name="Execute Python",
89
+ func=execute_python_tool,
90
+ description="Use to execute a Python file from a URL and get the final output."
91
+ ),
92
+ Tool(
93
+ name="Extract Numbers",
94
+ func=extract_numbers,
95
+ description="Use to extract all numbers from provided text."
96
+ ),
97
+ Tool(
98
+ name="Extract Names",
99
+ func=extract_names,
100
+ description="Use to extract capitalized names from provided text."
101
+ )
102
+ ]
103
 
104
+ PROMPT = (
105
+ "You are a general AI assistant. I will ask you a question. "
106
+ "Reason step by step, and use tools as needed. Only after you are sure, answer with the template: "
107
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
108
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
109
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
110
+ "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
111
+ )
 
 
 
 
 
 
 
 
112
 
113
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
 
114
 
115
+ class BasicAgent:
116
+ def __init__(self):
117
+ self.agent = initialize_agent(
118
+ tools=tools,
119
+ llm=llm,
120
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
121
+ verbose=False,
122
+ handle_parsing_errors=True
123
  )
124
+ self.prompt = PROMPT
 
 
125
 
126
+ def fetch_file_url(self, task_id):
127
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
128
+ try:
129
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
130
+ r = requests.head(url, timeout=5)
131
+ if r.status_code == 200:
132
+ return url
133
+ except:
134
+ pass
135
+ return None
 
 
 
136
 
137
+ def __call__(self, question: str, task_id: str = None) -> str:
138
+ file_url = self.fetch_file_url(task_id) if task_id else None
139
+ if file_url:
140
+ # If file is attached, add the info for the agent
141
+ question_aug = f"{question}\nThis task has assigned file at this URL: {file_url}"
142
+ else:
143
+ question_aug = question
144
+ # Add instruction prompt
145
+ full_prompt = self.prompt + "\n" + question_aug
146
+ result = self.agent.run(full_prompt)
147
+ # Extract only FINAL ANSWER
148
+ for line in result.splitlines():
149
+ if line.strip().lower().startswith("final answer:"):
150
+ return line.split(":", 1)[-1].strip(" .\"'")
151
+ return result