wt002 commited on
Commit
1613768
·
verified ·
1 Parent(s): 51e50b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +295 -196
app.py CHANGED
@@ -1,221 +1,320 @@
1
  import os
 
2
  import gradio as gr
 
 
 
 
 
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
7
  from dotenv import load_dotenv
8
- import heapq
9
- from collections import Counter
10
- import re
11
- from io import BytesIO
12
- from youtube_transcript_api import YouTubeTranscriptApi
13
- from langchain_community.tools.tavily_search import TavilySearchResults
14
- from langchain_community.document_loaders import WikipediaLoader
15
- from langchain_community.utilities import WikipediaAPIWrapper
16
- from langchain_community.document_loaders import ArxivLoader
17
 
18
- # (Keep Constants as is)
19
- # --- Constants ---
20
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
21
 
22
- #Load environment variables
23
  load_dotenv()
24
 
 
 
 
 
25
 
 
 
 
26
 
 
27
 
28
- from smolagents import Tool
29
- from langchain_community.document_loaders import WikipediaLoader
30
-
31
- class WikiSearchTool(Tool):
32
- name = "wiki_search"
33
- description = "Search Wikipedia for a query and return up to 2 results."
34
- inputs = {
35
- "query": {"type": "string", "description": "The search term for Wikipedia."}
36
- }
37
- output_type = "string"
38
-
39
- def forward(self, query: str) -> str:
40
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
41
-
42
- formatted_search_docs = "\n\n---\n\n".join(
43
- [
44
- f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
45
- for doc in search_docs
46
- ]
47
- )
48
- return formatted_search_docs
49
-
50
-
51
-
52
-
53
- class StringReverseTool(Tool):
54
- name = "reverse_message"
55
- description = "When you received a strange text, try to reverse it and perform action described in reversed message."
56
- inputs = {
57
- "message": {
58
- "type": "string",
59
- "description": "A message, which looks like strange and can be reversed to get actions to execute."
60
- }
61
- }
62
- output_type = "string"
63
-
64
- def __init__(self):
65
- return
66
-
67
- def forward(self, message: str):
68
- return message[::-1]
69
-
70
- class KeywordsExtractorTool(Tool):
71
- """Extracts top 5 keywords from a given text based on frequency."""
72
-
73
- name = "keywords_extractor"
74
- description = "This tool returns the 5 most frequent keywords occur in provided block of text."
75
-
76
- inputs = {
77
- "text": {
78
- "type": "string",
79
- "description": "Text to analyze for keywords.",
80
- }
81
- }
82
- output_type = "string"
83
-
84
- def forward(self, text: str) -> str:
85
- try:
86
- all_words = re.findall(r'\b\w+\b', text.lower())
87
- conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
88
- filtered_words = []
89
- for w in all_words:
90
- if w not in conjunctions:
91
- filtered_words.push(w)
92
- word_counts = Counter(filtered_words)
93
- k = 5
94
- return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
95
- except Exception as e:
96
- return f"Error during extracting most common words: {e}"
97
-
98
- @tool
99
- def parse_excel_to_json(task_id: str) -> dict:
100
  """
101
- For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
102
- Args:
103
- task_id: An task ID to fetch.
104
-
105
- Returns:
106
- {
107
- "task_id": str,
108
- "sheets": {
109
- "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
110
- ...
111
- },
112
- "status": "Success" | "Error"
113
- }
114
  """
115
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
116
-
117
  try:
118
- response = requests.get(url, timeout=100)
 
119
  if response.status_code != 200:
120
- return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
121
-
122
- xls_content = pd.ExcelFile(BytesIO(response.content))
123
- json_sheets = {}
124
-
125
- for sheet in xls_content.sheet_names:
126
- df = xls_content.parse(sheet)
127
- df = df.dropna(how="all")
128
- rows = df.head(20).to_dict(orient="records")
129
- json_sheets[sheet] = rows
130
-
131
- return {
132
- "task_id": task_id,
133
- "sheets": json_sheets,
134
- "status": "Success"
135
- }
136
-
137
  except Exception as e:
138
- return {
139
- "task_id": task_id,
140
- "sheets": {},
141
- "status": f"Error in parsing Excel file: {str(e)}"
142
- }
143
-
144
-
145
-
146
- class VideoTranscriptionTool(Tool):
147
- """Fetch transcripts from YouTube videos"""
148
- name = "transcript_video"
149
- description = "Fetch text transcript from YouTube movies with optional timestamps"
150
- inputs = {
151
- "url": {"type": "string", "description": "YouTube video URL or ID"},
152
- "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
153
- }
154
- output_type = "string"
155
-
156
- def forward(self, url: str, include_timestamps: bool = False) -> str:
157
-
158
- if "youtube.com/watch" in url:
159
- video_id = url.split("v=")[1].split("&")[0]
160
- elif "youtu.be/" in url:
161
- video_id = url.split("youtu.be/")[1].split("?")[0]
162
- elif len(url.strip()) == 11: # Direct ID
163
- video_id = url.strip()
164
- else:
165
- return f"YouTube URL or ID: {url} is invalid!"
166
-
167
- try:
168
- transcription = YouTubeTranscriptApi.get_transcript(video_id)
169
-
170
- if include_timestamps:
171
- formatted_transcription = []
172
- for part in transcription:
173
- timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
174
- formatted_transcription.append(f"[{timestamp}] {part['text']}")
175
- return "\n".join(formatted_transcription)
176
- else:
177
- return " ".join([part['text'] for part in transcription])
178
-
179
- except Exception as e:
180
- return f"Error in extracting YouTube transcript: {str(e)}"
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  class BasicAgent:
183
  def __init__(self):
184
- token = os.environ.get("HF_API_TOKEN")
185
- model = HfApiModel(
186
- temperature=0.1,
187
- token=token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- search_tool = DuckDuckGoSearchTool()
191
- wiki_search_tool = WikiSearchTool()
192
- str_reverse_tool = StringReverseTool()
193
- keywords_extract_tool = KeywordsExtractorTool()
194
- speech_to_text_tool = SpeechToTextTool()
195
- visit_webpage_tool = VisitWebpageTool()
196
- final_answer_tool = FinalAnswerTool()
197
- video_transcription_tool = VideoTranscriptionTool()
198
-
199
- system_prompt = f"""
200
- You are my general AI assistant. Your task is to answer the question I asked.
201
- First, provide an explanation of your reasoning, step by step, to arrive at the answer.
202
- Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
203
- [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
204
- If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
205
- If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
206
- If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
207
- """
208
- self.agent = CodeAgent(
209
- model=model,
210
- tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
211
- add_base_tools=True
212
- )
213
- self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
214
-
215
- def __call__(self, question: str) -> str:
216
- print(f"Agent received question (first 50 chars): {question[:50]}...")
217
- answer = self.agent.run(question)
218
- print(f"Agent returning answer: {answer}")
 
 
 
 
 
 
 
 
219
  return answer
220
 
221
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
1
  import os
2
+ from typing import Annotated, Optional, TypedDict
3
  import gradio as gr
4
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
5
+ from langchain_openai import ChatOpenAI
6
+ from langgraph.graph.message import add_messages
7
+ from langgraph.graph import StateGraph, START
8
+ from langgraph.prebuilt import tools_condition, ToolNode
9
  import requests
 
10
  import pandas as pd
11
+ from langchain.tools import Tool
12
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
13
 
14
+ from arxiv_searcher import ArxivSearcher
15
+ from chess_algebraic_notation_retriever import ChessAlgebraicNotationMoveRetriever
16
+ from excel_file_reader import ExcelFileReader
17
+ from image_question_answer_tool import ImageQuestionAnswerTool
18
+ from python_code_question_answer_tool import PythonCodeQuestionAnswerTool
19
+ from tavily_searcher import TavilySearcher
20
+ from transcriber import Transcriber
21
+ from wikipedia_searcher import WikipediaSearcher
22
+ from youtube_video_question_answer_tool import YoutubeVideoQuestionAnswerTool
23
 
 
24
  load_dotenv()
25
 
26
+ # (Keep Constants as is)
27
+ # --- Constants ---
28
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
29
+ ASSOCIATED_FILE_ENDPOINT = f"{DEFAULT_API_URL}/files/"
30
 
31
+ # --- Basic Agent Definition ---
32
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
33
+ #search_tool = DuckDuckGoSearchRun()
34
 
35
+ #search_tool = DuckDuckGoSearcherTool()
36
 
37
+ def retrieve_task_file(task_id: str) -> Optional[bytes]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
+ Retrieve the task file for a given task ID.
 
 
 
 
 
 
 
 
 
 
 
 
40
  """
 
 
41
  try:
42
+ response = requests.get(ASSOCIATED_FILE_ENDPOINT + task_id, timeout=15)
43
+ response.raise_for_status()
44
  if response.status_code != 200:
45
+ print(f"Error fetching file: {response.status_code}")
46
+ return None
47
+ #print(f"Fetched file: {response.content}")
48
+ return response.content
49
+ except requests.exceptions.RequestException as e:
50
+ print(f"Error fetching file: {e}")
51
+ return None
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
+ print(f"An unexpected error occurred fetching file: {e}")
54
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ def retrieve_next_chess_move_in_algebraic_notation(task_file_path: str, is_black_turn: bool) -> str:
57
+ """
58
+ Retrieve the next chess move in algebraic notation from an image path.
59
+ """
60
+ if task_file_path is None:
61
+ return "Error: Task file not found."
62
+ # Retrieve the next chess move in algebraic notation
63
+ next_chess_move = ChessAlgebraicNotationMoveRetriever().retrieve(task_file_path, is_black_turn)
64
+ return next_chess_move
65
+
66
+ # Initialize the tool
67
+ retrieve_next_chess_move_in_algebraic_notation_tool = Tool(
68
+ name="retrieve_next_chess_move_in_algebraic_notation",
69
+ func=retrieve_next_chess_move_in_algebraic_notation,
70
+ description="Retrieve the next chess move in algebraic notation from an image path."
71
+ )
72
+
73
+ def transcribe_audio(file_path: str) -> str:
74
+ if file_path is None:
75
+ return "Error: Audio path not found."
76
+ # Transcribe the audio
77
+ return Transcriber().transcribe(file_path)
78
+
79
+ # Initialize the tool
80
+ transcribe_audio_tool = Tool(
81
+ name="transcribe_audio",
82
+ func=transcribe_audio,
83
+ description="Transcribe the audio from an audio path."
84
+ )
85
+
86
+ # Initialize the tool
87
+ answer_python_code_tool = PythonCodeQuestionAnswerTool()
88
+
89
+ # Initialize the tool
90
+ answer_image_question_tool = ImageQuestionAnswerTool()
91
+
92
+ # Initialize the tool
93
+ answer_youtube_video_question_tool = YoutubeVideoQuestionAnswerTool()
94
+
95
+ '''def answer_youtube_video_question(youtube_video_url: str, question: str) -> str:
96
+ """
97
+ Answer the question based on the youtube video.
98
+ """
99
+ if youtube_video_url is None:
100
+ return "Error: Video not found."
101
+ # Download the video
102
+ video_path = YoutubeVideoDownloader().download_video(youtube_video_url)
103
+ # Answer the question
104
+ return VideoQuestionAnswer().answer(video_path, question)
105
+ # Initialize the tool
106
+ answer_youtube_video_question_tool = Tool(
107
+ name="answer_youtube_video_question",
108
+ func=answer_youtube_video_question,
109
+ description="Answer the question based on the youtube video."
110
+ )'''
111
+
112
+ def read_excel_file(file_path: str) -> str:
113
+ if file_path is None:
114
+ return "Error: File not found."
115
+ return ExcelFileReader().read_file(file_path)
116
+
117
+ # Initialize the tool
118
+ read_excel_file_tool = Tool(
119
+ name="read_excel_file",
120
+ func=read_excel_file,
121
+ description="Read the excel file."
122
+ )
123
+
124
+ # Initialize the tool
125
+ wikipedia_search_tool = Tool(
126
+ name="wikipedia_search",
127
+ func=WikipediaSearcher().search,
128
+ description="Search Wikipedia for a given query."
129
+ )
130
+
131
+ # Initialize the tool
132
+ arxiv_search_tool = Tool(
133
+ name="arxiv_search",
134
+ func=ArxivSearcher().search,
135
+ description="Search Arxiv for a given query."
136
+ )
137
+
138
+ tavily_search_tool = Tool(
139
+ name="tavily_search",
140
+ func=TavilySearcher().search,
141
+ description="Search the web for a given query."
142
+ )
143
+
144
+ def format_gaia_answer(answer: str) -> str:
145
+ llm = ChatOpenAI(model="o3-mini", openai_api_key=os.getenv("OPENAI_API_KEY"))
146
+ prompt = f"""
147
+ You are formatting answers for the GAIA benchmark, which requires responses to be concise and unambiguous.
148
+ Given the answer: {answer}
149
+ Return the answer in the correct GAIA format:
150
+ - If the answer is a single word or number, return it without any additional text or formatting.
151
+ - If the answer is a list, return a comma-separated list without any additional text or formatting.
152
+ - If the answer is a string, return it without any additional text or formatting.
153
+ Do not include any prefixes, dots, enumerations, explanations, or quotation marks.
154
+ Do not include any additional text or formatting.
155
+ """
156
+ response = llm.invoke(prompt)
157
+ # Delete double quotes
158
+ return response.content.strip().replace('"', '')
159
+
160
+ class AgentState(TypedDict):
161
+ # The document provided
162
+ messages: Annotated[list[AnyMessage], add_messages]
163
+ file_path: Optional[str]
164
+
165
  class BasicAgent:
166
  def __init__(self):
167
+ tools = [
168
+ tavily_search_tool,
169
+ arxiv_search_tool,
170
+ wikipedia_search_tool,
171
+ transcribe_audio_tool,
172
+ answer_python_code_tool,
173
+ answer_image_question_tool,
174
+ answer_youtube_video_question_tool,
175
+ read_excel_file_tool
176
+ ]
177
+ '''llm = ChatGoogleGenerativeAI(
178
+ model="gemini-2.0-flash",
179
+ temperature=0.2,
180
+ api_key=os.getenv("GEMINI_API_KEY")
181
+ )'''
182
+ llm = ChatOpenAI(model="o3-mini", openai_api_key=os.getenv("OPENAI_API_KEY"))
183
+ self.llm_with_tools = llm.bind_tools(tools)
184
+ builder = StateGraph(AgentState)
185
+
186
+ # Define nodes: these do the work
187
+ builder.add_node("assistant", self.assistant)
188
+ builder.add_node("tools", ToolNode(tools))
189
+
190
+ # Define edges: these determine how the control flow moves
191
+ builder.add_edge(START, "assistant")
192
+ builder.add_conditional_edges(
193
+ "assistant",
194
+ # If the latest message requires a tool, route to tools
195
+ # Otherwise, provide a direct response
196
+ tools_condition,
197
  )
198
+ builder.add_edge("tools", "assistant")
199
+ self.agent = builder.compile()
200
+
201
+ print("BasicAgent initialized.")
202
+
203
+ def assistant(self, state: AgentState):
204
+ # System message
205
+ textual_description_of_tools="""
206
+ tavily_search(query: str) -> str:
207
+ Search the web for a given query.
208
+ Args:
209
+ query: Query to search the web for (string).
210
+ Returns:
211
+ A single string containing the information found on the web.
212
+ arxiv_search(query: str) -> str:
213
+ Search Arxiv, that contains scientific papers, for a given query.
214
+ Args:
215
+ query: Query to search Arxiv for (string).
216
+ Returns:
217
+ A single string containing the answer to the question.
218
+ wikipedia_search(query: str) -> str:
219
+ Search Wikipedia for a given query.
220
+ Args:
221
+ query: Query to search Wikipedia for (string).
222
+ Returns:
223
+ A single string containing the answer to the question.
224
+ transcribe_audio(file_path: str) -> str:
225
+ Transcribe the audio from an audio path.
226
+ Args:
227
+ file_path: File path of the audio file (string).
228
+ Returns:
229
+ A single string containing the transcribed text from the audio.
230
+
231
+ answer_python_code(file_path: str, question: str) -> str:
232
+ Answer the question based on the python code.
233
+ Args:
234
+ file_path: File path of the python file (string).
235
+ question: Question to answer (string).
236
+ Returns:
237
+ A single string containing the answer to the question.
238
+
239
+ answer_image_question(file_path: str, question: str) -> str:
240
+ Answer the question based on the image.
241
+ Args:
242
+ file_path: File path of the image (string).
243
+ question: Question to answer (string).
244
+ Returns:
245
+ A single string containing the answer to the question.
246
+
247
+ download_youtube_video(youtube_video_url: str) -> str:
248
+ Download the Youtube video into a local file based on the URL
249
+ Args:
250
+ youtube_video_url: A youtube video url (string).
251
+ Returns:
252
+ A single string containing the file path of the downloaded youtube video.
253
+ answer_youtube_video_question(file_path: str, question: str) -> str:
254
+ Answer the question based on file path of the downloaded youtube video
255
+ Args:
256
+ file_path: File path of the downloaded youtube video (string).
257
+ question: Question to answer (string).
258
+ Returns:
259
+ A single string containing the answer to the question.
260
+
261
+ read_excel_file(file_path: str) -> str:
262
+ Read the excel file.
263
+ Args:
264
+ file_path: File path of the excel file (string).
265
+ Returns:
266
+ A markdown formatted string containing the contents of the excel file.
267
+ """
268
+ file_path=state["file_path"]
269
+ prompt = f"""
270
+ You are a helpful assistant that can analyse images, videos, excel files and Python scripts and run computations with provided tools:
271
+ {textual_description_of_tools}
272
+ You have access to the file path of the attached file in case it's informed. Currently the file path is: {file_path}
273
+ Be direct and specific. GAIA benchmark requires exact matching answers.
274
+ For example, if asked "What is the capital of France?", respond simply with "Paris".
275
+ Do not include any prefixes, dots, enumerations, explanations, or quotation marks.
276
+ Do not include any additional text or formatting.
277
+ If you are required a number, return a number, not the items.
278
+ """
279
+ sys_msg = SystemMessage(content=prompt)
280
 
281
+ return {
282
+ "messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"], config={"configurable": {"file_path": state["file_path"]}})],
283
+ "file_path": state["file_path"]
284
+ }
285
+ '''return {
286
+ "messages": [self.llm_with_tools.invoke(
287
+ state["messages"],
288
+ config={"configurable": {"file_path": state["file_path"]}} # Aquí pasas el task_id
289
+ )],
290
+ "file_path": state["file_path"]
291
+ }'''
292
+
293
+ def __call__(self, question: str, task_id: str, file_name: str) -> str:
294
+ print(f"######################### Agent received question (first 50 chars): {question[:50]}... with file_name: {file_name}")
295
+
296
+ # Get the file path
297
+ tmp_file_path = None
298
+ if file_name is not None and file_name != "":
299
+ file_content = retrieve_task_file(task_id)
300
+ if file_content is not None:
301
+ print(f"Saving file {file_name} to tmp folder")
302
+ tmp_file_path = f"tmp/{file_name}"
303
+ with open(tmp_file_path, "wb") as f:
304
+ f.write(file_content)
305
+ # Show the file path
306
+ print(f"File path: {tmp_file_path}")
307
+
308
+ messages = self.agent.invoke({"messages": [HumanMessage(question)], "file_path": tmp_file_path})
309
+ # Show the messages
310
+ for m in messages['messages']:
311
+ m.pretty_print()
312
+ answer = messages["messages"][-1].content
313
+ answer = format_gaia_answer(answer)
314
+ print(f"######################### Agent returning answer: {answer}\n")
315
+ # Delete the file
316
+ if tmp_file_path is not None:
317
+ os.remove(tmp_file_path)
318
  return answer
319
 
320
  def run_and_submit_all( profile: gr.OAuthProfile | None):