wt002 commited on
Commit
c3c7328
·
verified ·
1 Parent(s): 46328b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -301
app.py CHANGED
@@ -1,361 +1,222 @@
1
  import os
2
- import re
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
 
 
6
  import heapq
7
  from collections import Counter
 
8
  from io import BytesIO
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
- from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
11
- from langchain_community.document_loaders import WikipediaLoader, PyPDFLoader, TextLoader
12
- from dotenv import load_dotenv
13
- import tempfile
14
- import mimetypes
15
-
16
-
17
- # --- Load environment variables ---
18
- load_dotenv()
19
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
20
 
21
  # (Keep Constants as is)
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
 
 
25
 
26
 
27
- from typing import Optional
28
- from pydantic import BaseModel
29
-
30
- class InputSchema(BaseModel):
31
- include_timestamps: Optional[bool] = None
32
 
33
 
34
- # --- Utility Functions ---
35
- def extract_youtube_id(url: str) -> str:
36
- """Extract YouTube ID from various URL formats"""
37
- patterns = [
38
- r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)',
39
- r'(?:https?:\/\/)?youtu\.be\/([^?]+)',
40
- r'([a-zA-Z0-9_-]{11})' # Catches just the ID if provided directly
41
- ]
42
- for pattern in patterns:
43
- match = re.search(pattern, url)
44
- if match:
45
- return match.group(1)
46
- return ""
47
 
48
- # --- Enhanced Tools ---
49
  class WikiSearchTool(Tool):
50
- """Enhanced Wikipedia search with better formatting and error handling"""
51
  name = "wiki_search"
52
- description = "Search Wikipedia for a query. Returns up to 2 results with metadata."
53
- inputs = {"query": {"type": "string", "description": "Search term for Wikipedia"}}
 
 
54
  output_type = "string"
55
 
56
  def forward(self, query: str) -> str:
57
- try:
58
- logger.info(f"Searching Wikipedia for: {query}")
59
- docs = WikipediaLoader(query=query, load_max_docs=2).load()
60
- if not docs:
61
- logger.info(f"No Wikipedia articles found for: {query}")
62
- return "No Wikipedia articles found."
63
-
64
- formatted_results = []
65
- for i, doc in enumerate(docs):
66
- # Limit page content length to avoid overwhelming the model, but provide enough context
67
- summary = doc.page_content[:1000] + "..." if len(doc.page_content) > 1000 else doc.page_content
68
- formatted_results.append(
69
- f"--- Wikipedia Result {i+1} ---\n"
70
- f"Title: {doc.metadata.get('title', 'N/A')}\n"
71
- f"URL: {doc.metadata.get('source', 'N/A')}\n"
72
- f"Summary: {summary}\n"
73
- )
74
- return "\n\n".join(formatted_results)
75
- except Exception as e:
76
- logger.error(f"Wikipedia search error for '{query}': {e}")
77
- return f"Wikipedia search error: {str(e)}"
78
-
79
- class FileAnalysisTool(Tool):
80
- """Universal file analyzer for text/PDF/Excel files"""
81
- name = "file_analysis"
82
- description = "Analyze text, PDF, and Excel files. Returns extracted content."
83
- inputs = {"file_path": {"type": "string", "description": "Path to the local file"}}
84
  output_type = "string"
85
 
86
- def forward(self, file_path: str) -> str:
87
- if not os.path.exists(file_path):
88
- return f"File not found: {file_path}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
90
  try:
91
- mime_type, _ = mimetypes.guess_type(file_path)
92
- logger.info(f"Analyzing file: {file_path} with MIME type: {mime_type}")
93
-
94
- if mime_type == "application/pdf":
95
- return self._process_pdf(file_path)
96
- elif mime_type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
97
- return self._process_excel(file_path)
98
- elif mime_type and ("text" in mime_type or "csv" in mime_type):
99
- return self._process_text(file_path)
100
- else:
101
- return f"Unsupported file type for analysis: {mime_type}. Only PDF, Excel, and text/CSV files are supported."
102
  except Exception as e:
103
- logger.error(f"File analysis error for '{file_path}': {e}")
104
- return f"File analysis error: {str(e)}"
105
-
106
- def _process_pdf(self, path: str) -> str:
107
- loader = PyPDFLoader(path)
108
- docs = loader.load()
109
- content = "\n\n".join([doc.page_content for doc in docs])
110
- # Truncate to avoid excessive token usage, provide a warning if truncated
111
- if len(content) > 8000:
112
- logger.warning(f"PDF content truncated from {len(content)} to 8000 characters for {path}")
113
- return content[:8000] + "\n... [Content truncated]"
114
- return content
115
-
116
- def _process_excel(self, path: str) -> str:
117
- df = pd.read_excel(path)
118
- # Provide a sample of the data and its basic info
119
- info = BytesIO()
120
- df.info(buf=info)
121
- info_str = info.getvalue().decode('utf-8')
122
 
123
- return (f"Excel file loaded. First 10 rows:\n{df.head(10).to_markdown()}\n\n"
124
- f"DataFrame Info:\n{info_str}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- def _process_text(self, path: str) -> str:
127
- with open(path, 'r', encoding='utf-8') as f:
128
- content = f.read()
129
- if len(content) > 8000:
130
- logger.warning(f"Text file content truncated from {len(content)} to 8000 characters for {path}")
131
- return content[:8000] + "\n... [Content truncated]"
132
- return content
133
 
134
  class VideoTranscriptionTool(Tool):
135
- """Enhanced YouTube transcription with multilingual support and better output"""
136
  name = "transcript_video"
137
- description = "Fetch YouTube video transcripts with optional timestamps. Supports English, French, Spanish, German."
138
  inputs = {
139
- "url": {"type": "string", "description": "YouTube URL or ID"},
140
- "include_timestamps": {"type": "boolean", "description": "Include timestamps? (default: False)"}
141
  }
142
  output_type = "string"
143
 
144
  def forward(self, url: str, include_timestamps: bool = False) -> str:
 
 
 
 
 
 
 
 
 
 
145
  try:
146
- video_id = extract_youtube_id(url)
147
- if not video_id:
148
- return "Invalid YouTube URL or ID format. Please provide a valid YouTube URL or an 11-character video ID."
149
-
150
- logger.info(f"Attempting to transcribe video ID: {video_id}")
151
- transcript = YouTubeTranscriptApi.get_transcript(
152
- video_id,
153
- languages=['en', 'fr', 'es', 'de'] # Prioritize common languages
154
- )
155
-
156
- if not transcript:
157
- return f"No transcript found for video ID: {video_id} in supported languages (en, fr, es, de)."
158
 
159
  if include_timestamps:
160
- formatted_transcript = "\n".join(
161
- f"[{int(seg['start']//60):02d}:{int(seg['start']%60):02d}] {seg['text']}"
162
- for seg in transcript
163
- )
 
164
  else:
165
- formatted_transcript = " ".join(seg['text'] for seg in transcript)
166
-
167
- return formatted_transcript
168
- except Exception as e:
169
- logger.error(f"Transcription error for '{url}': {e}")
170
- return f"Transcription error: {str(e)}. This might be due to no available transcript or an unsupported video."
171
-
172
- class DataAnalysisTool(Tool):
173
- """Perform data analysis using pandas on structured data (CSV/Excel)"""
174
- name = "data_analysis"
175
- description = "Analyze CSV/Excel data using pandas operations. Supported operations: 'describe', 'groupby:column:aggfunc' (e.g., 'groupby:Category:mean')."
176
- inputs = {
177
- "file_path": {"type": "string", "description": "Path to the local data file (CSV or Excel)"},
178
- "operation": {"type": "string", "description": "Pandas operation (e.g., 'describe', 'groupby:column_name:mean')"}
179
- }
180
- output_type = "string"
181
-
182
- def forward(self, file_path: str, operation: str) -> str:
183
- if not os.path.exists(file_path):
184
- return f"File not found: {file_path}"
185
 
186
- try:
187
- if file_path.endswith('.csv'):
188
- df = pd.read_csv(file_path)
189
- elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
190
- df = pd.read_excel(file_path)
191
- else:
192
- return "Unsupported file format for data analysis. Please provide a .csv or .xlsx file."
193
-
194
- logger.info(f"Performing data analysis operation '{operation}' on {file_path}")
195
-
196
- if operation == "describe":
197
- return "Descriptive Statistics:\n" + str(df.describe())
198
- elif operation.startswith("groupby:"):
199
- parts = operation.split(":")
200
- if len(parts) == 3:
201
- _, col, agg = parts
202
- if col not in df.columns:
203
- return f"Column '{col}' not found in the DataFrame."
204
- try:
205
- result = df.groupby(col).agg(agg)
206
- return f"Groupby operation '{agg}' on column '{col}':\n" + str(result)
207
- except Exception as agg_e:
208
- return f"Error performing aggregation '{agg}' on column '{col}': {str(agg_e)}"
209
- else:
210
- return "Invalid 'groupby' operation format. Use 'groupby:column_name:agg_function'."
211
- else:
212
- return "Unsupported operation. Try: 'describe' or 'groupby:column_name:agg_function'."
213
  except Exception as e:
214
- logger.error(f"Data analysis error for '{file_path}' with operation '{operation}': {e}")
215
- return f"Data analysis error: {str(e)}. Please check file content and operation."
216
 
217
- # --- Agent Initialization ---
218
  class BasicAgent:
219
  def __init__(self):
220
- self.model = HfApiModel(
221
- temperature=0.0, # Slightly increased temperature for more creative responses if appropriate
222
- token=HF_API_TOKEN,
223
- max_tokens=2000
224
- )
225
 
226
- self.tools = self._initialize_tools()
227
- self.agent = self._create_agent()
228
-
229
- def _initialize_tools(self) -> list:
230
- """Initialize all tools with enhanced capabilities"""
231
- return [
232
- DuckDuckGoSearchTool(),
233
- WikiSearchTool(),
234
- VisitWebpageTool(),
235
- SpeechToTextTool(), # Might be less relevant for a text-based research agent but kept if needed
236
- FinalAnswerTool(),
237
- VideoTranscriptionTool(),
238
- FileAnalysisTool(),
239
- DataAnalysisTool(),
240
- self._create_excel_download_tool(), # Renamed for clarity
241
- self._create_keywords_tool()
242
- ]
243
-
244
- def _create_excel_download_tool(self):
245
- """Tool to download and parse Excel files from a specific URL"""
246
- @tool
247
- def download_and_parse_excel(task_id: str) -> dict:
248
- """
249
- Downloads an Excel file from a predefined URL using a task_id and parses its content.
250
- Returns a dictionary with status and data (first 20 rows).
251
- """
252
- try:
253
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
254
- logger.info(f"Attempting to download Excel from: {url}")
255
- response = requests.get(url, timeout=60) # Increased timeout for larger files
256
- response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
257
-
258
- with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
259
- tmp.write(response.content)
260
- temp_file_path = tmp.name
261
-
262
- df = pd.read_excel(temp_file_path)
263
- os.unlink(temp_file_path) # Clean up the temporary file
264
-
265
- logger.info(f"Successfully downloaded and parsed Excel for task_id: {task_id}")
266
- return {
267
- "task_id": task_id,
268
- "data_sample": df.head(10).to_dict(orient="records"), # Reduced to 10 for conciseness
269
- "status": "Success",
270
- "columns": df.columns.tolist(), # Added column names for context
271
- "shape": df.shape # Added shape for context
272
- }
273
- except requests.exceptions.RequestException as req_err:
274
- logger.error(f"Network or HTTP error downloading Excel for task_id '{task_id}': {req_err}")
275
- return {"status": f"Download error: {str(req_err)}"}
276
- except Exception as e:
277
- logger.error(f"Error parsing Excel for task_id '{task_id}': {e}")
278
- return {"status": f"Parsing error: {str(e)}"}
279
- return download_and_parse_excel
280
-
281
- def _create_keywords_tool(self):
282
- """Keywords extractor with TF-IDF like scoring (basic frequency for now)"""
283
- @tool
284
- def extract_keywords(text: str, top_n: int = 5) -> list:
285
- """
286
- Extracts the most frequent keywords from a given text, excluding common stopwords.
287
- Args:
288
- text (str): The input text to extract keywords from.
289
- top_n (int): The number of top keywords to return.
290
- Returns:
291
- list: A list of the most frequent keywords.
292
- """
293
- if not text:
294
- return []
295
-
296
- # Use a more comprehensive list of English stopwords
297
- stopwords = set([
298
- "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it",
299
- "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these",
300
- "they", "this", "to", "was", "will", "with", "he", "she", "it's", "i", "we", "you", "my",
301
- "your", "our", "us", "him", "her", "his", "hers", "its", "them", "their", "what", "when",
302
- "where", "why", "how", "which", "who", "whom", "can", "could", "would", "should", "may",
303
- "might", "must", "have", "has", "had", "do", "does", "did", "am", "are", "is", "were", "been",
304
- "being", "from", "up", "down", "out", "off", "over", "under", "again", "further", "then",
305
- "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few",
306
- "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so",
307
- "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"
308
- ])
309
-
310
- words = re.findall(r'\b\w+\b', text.lower()) # Relaxed regex to capture all words
311
- filtered = [w for w in words if w not in stopwords and len(w) > 2] # Filter words less than 3 chars
312
- counter = Counter(filtered)
313
- return [word for word, _ in counter.most_common(top_n)]
314
- return extract_keywords
315
-
316
- def _create_agent(self) -> CodeAgent:
317
- """Create agent with improved system prompt"""
318
- system_prompt = """
319
- You are an advanced, helpful, and highly analytical research assistant. Your goal is to provide accurate, comprehensive, and well-structured answers to user queries, leveraging all available tools efficiently.
320
-
321
- **Follow this robust process:**
322
-
323
- 1. **Understand the User's Need:** Carefully analyze the user's question, including any attached files or specific requests (e.g., "summarize," "analyze data," "find facts").
324
- 2. **Formulate a Detailed Plan:** Before acting, create a clear, step-by-step plan. This plan should outline:
325
- * What information needs to be gathered.
326
- * Which tools are most appropriate for each step (e.g., `duckduckgo_search` for general web search, `wiki_search` for encyclopedic facts, `transcript_video` for YouTube, `file_analysis` or `data_analysis` for local files).
327
- * How you will combine information from different sources.
328
- * How you will verify or synthesize the findings.
329
- 3. **Execute the Plan Using Tools:** Call the necessary tools, providing clear and correct arguments. If a tool fails, try to understand why and adapt your plan (e.g., try a different search query or tool).
330
- 4. **Synthesize and Verify Information:** Once you have gathered sufficient information, synthesize it into a coherent answer. Do not just list facts; explain their significance and how they relate to the original question. If there are contradictions or uncertainties, mention them.
331
- 5. **Formulate the Final Answer:**
332
- * Present your answer clearly and concisely.
333
- * Always begin your ultimate response with "FINAL ANSWER:".
334
- * If the answer is a single number, provide only the number.
335
- * If the answer is a list, provide comma-separated values.
336
- * For complex answers, use structured formats like bullet points or JSON where appropriate to enhance readability.
337
- * **Crucially, always include sources or references (e.g., URLs, Wikipedia titles, file names) where you obtained the information.** This builds trust and allows for verification.
338
- * If you used `file_analysis` or `data_analysis` tools on an uploaded file, explicitly state that you analyzed the provided file.
339
-
340
- **Important Considerations:**
341
- * **Prioritize:** If the query involves a specific file, start by analyzing that file if appropriate.
342
- * **Limitations:** If you cannot answer a question with the available tools, state that clearly.
343
- * **Conciseness:** Be as concise as possible while providing a complete and accurate answer.
344
  """
345
- agent = CodeAgent(
346
- model=self.model,
347
- tools=self.tools,
348
  add_base_tools=True
349
  )
350
- agent.prompt_templates["system_prompt"] = system_prompt
351
- return agent
352
 
353
  def __call__(self, question: str) -> str:
354
  print(f"Agent received question (first 50 chars): {question[:50]}...")
355
  answer = self.agent.run(question)
356
  print(f"Agent returning answer: {answer}")
357
  return answer
358
-
359
 
360
  def run_and_submit_all( profile: gr.OAuthProfile | None):
361
  """
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
7
+ from dotenv import load_dotenv
8
  import heapq
9
  from collections import Counter
10
+ import re
11
  from io import BytesIO
12
  from youtube_transcript_api import YouTubeTranscriptApi
13
+ from langchain_community.tools.tavily_search import TavilySearchResults
14
+ from langchain_community.document_loaders import WikipediaLoader
15
+ from langchain_community.utilities import WikipediaAPIWrapper
16
+ from langchain_community.document_loaders import ArxivLoader
 
 
 
 
 
 
17
 
18
  # (Keep Constants as is)
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
+ #Load environment variables
23
+ load_dotenv()
24
 
25
 
 
 
 
 
 
26
 
27
 
28
+ from smolagents import Tool
29
+ from langchain_community.document_loaders import WikipediaLoader
 
 
 
 
 
 
 
 
 
 
 
30
 
 
31
  class WikiSearchTool(Tool):
 
32
  name = "wiki_search"
33
+ description = "Search Wikipedia for a query and return up to 2 results."
34
+ inputs = {
35
+ "query": {"type": "string", "description": "The search term for Wikipedia."}
36
+ }
37
  output_type = "string"
38
 
39
  def forward(self, query: str) -> str:
40
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
41
+
42
+ formatted_search_docs = "\n\n---\n\n".join(
43
+ [
44
+ f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
45
+ for doc in search_docs
46
+ ]
47
+ )
48
+ return formatted_search_docs
49
+
50
+
51
+
52
+
53
+ class StringReverseTool(Tool):
54
+ name = "reverse_message"
55
+ description = "When you received a strange text, try to reverse it and perform action described in reversed message."
56
+ inputs = {
57
+ "message": {
58
+ "type": "string",
59
+ "description": "A message, which looks like strange and can be reversed to get actions to execute."
60
+ }
61
+ }
 
 
 
 
 
62
  output_type = "string"
63
 
64
+ def __init__(self):
65
+ return
66
+
67
+ def forward(self, message: str):
68
+ return message[::-1]
69
+
70
+ class KeywordsExtractorTool(Tool):
71
+ """Extracts top 5 keywords from a given text based on frequency."""
72
+
73
+ name = "keywords_extractor"
74
+ description = "This tool returns the 5 most frequent keywords occur in provided block of text."
75
+
76
+ inputs = {
77
+ "text": {
78
+ "type": "string",
79
+ "description": "Text to analyze for keywords.",
80
+ }
81
+ }
82
+ output_type = "string"
83
 
84
+ def forward(self, text: str) -> str:
85
  try:
86
+ all_words = re.findall(r'\b\w+\b', text.lower())
87
+ conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
88
+ filtered_words = []
89
+ for w in all_words:
90
+ if w not in conjunctions:
91
+ filtered_words.push(w)
92
+ word_counts = Counter(filtered_words)
93
+ k = 5
94
+ return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
 
 
95
  except Exception as e:
96
+ return f"Error during extracting most common words: {e}"
97
+
98
+ @tool
99
+ def parse_excel_to_json(task_id: str) -> dict:
100
+ """
101
+ For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
102
+ Args:
103
+ task_id: An task ID to fetch.
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ Returns:
106
+ {
107
+ "task_id": str,
108
+ "sheets": {
109
+ "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
110
+ ...
111
+ },
112
+ "status": "Success" | "Error"
113
+ }
114
+ """
115
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
116
+
117
+ try:
118
+ response = requests.get(url, timeout=100)
119
+ if response.status_code != 200:
120
+ return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
121
+
122
+ xls_content = pd.ExcelFile(BytesIO(response.content))
123
+ json_sheets = {}
124
+
125
+ for sheet in xls_content.sheet_names:
126
+ df = xls_content.parse(sheet)
127
+ df = df.dropna(how="all")
128
+ rows = df.head(20).to_dict(orient="records")
129
+ json_sheets[sheet] = rows
130
+
131
+ return {
132
+ "task_id": task_id,
133
+ "sheets": json_sheets,
134
+ "status": "Success"
135
+ }
136
+
137
+ except Exception as e:
138
+ return {
139
+ "task_id": task_id,
140
+ "sheets": {},
141
+ "status": f"Error in parsing Excel file: {str(e)}"
142
+ }
143
+
144
 
 
 
 
 
 
 
 
145
 
146
  class VideoTranscriptionTool(Tool):
147
+ """Fetch transcripts from YouTube videos"""
148
  name = "transcript_video"
149
+ description = "Fetch text transcript from YouTube movies with optional timestamps"
150
  inputs = {
151
+ "url": {"type": "string", "description": "YouTube video URL or ID"},
152
+ "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
153
  }
154
  output_type = "string"
155
 
156
  def forward(self, url: str, include_timestamps: bool = False) -> str:
157
+
158
+ if "youtube.com/watch" in url:
159
+ video_id = url.split("v=")[1].split("&")[0]
160
+ elif "youtu.be/" in url:
161
+ video_id = url.split("youtu.be/")[1].split("?")[0]
162
+ elif len(url.strip()) == 11: # Direct ID
163
+ video_id = url.strip()
164
+ else:
165
+ return f"YouTube URL or ID: {url} is invalid!"
166
+
167
  try:
168
+ transcription = YouTubeTranscriptApi.get_transcript(video_id)
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  if include_timestamps:
171
+ formatted_transcription = []
172
+ for part in transcription:
173
+ timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
174
+ formatted_transcription.append(f"[{timestamp}] {part['text']}")
175
+ return "\n".join(formatted_transcription)
176
  else:
177
+ return " ".join([part['text'] for part in transcription])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  except Exception as e:
180
+ return f"Error in extracting YouTube transcript: {str(e)}"
 
181
 
 
182
  class BasicAgent:
183
  def __init__(self):
184
+ token = os.environ.get("HF_API_TOKEN")
185
+ self.api_token = os.environ.get("HF_API_TOKEN")
186
+ self.api_url = "https://api-inference.huggingface.co/models/"
187
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.3"
 
188
 
189
+
190
+ search_tool = DuckDuckGoSearchTool()
191
+ wiki_search_tool = WikiSearchTool()
192
+ str_reverse_tool = StringReverseTool()
193
+ keywords_extract_tool = KeywordsExtractorTool()
194
+ speech_to_text_tool = SpeechToTextTool()
195
+ visit_webpage_tool = VisitWebpageTool()
196
+ final_answer_tool = FinalAnswerTool()
197
+ video_transcription_tool = VideoTranscriptionTool()
198
+
199
+ system_prompt = f"""
200
+ You are my general AI assistant. Your task is to answer the question I asked.
201
+ First, provide an explanation of your reasoning, step by step, to arrive at the answer.
202
+ Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
203
+ [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
204
+ If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
205
+ If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
206
+ If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  """
208
+ self.agent = CodeAgent(
209
+ model=model,
210
+ tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
211
  add_base_tools=True
212
  )
213
+ self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
 
214
 
215
  def __call__(self, question: str) -> str:
216
  print(f"Agent received question (first 50 chars): {question[:50]}...")
217
  answer = self.agent.run(question)
218
  print(f"Agent returning answer: {answer}")
219
  return answer
 
220
 
221
  def run_and_submit_all( profile: gr.OAuthProfile | None):
222
  """