wt002 commited on
Commit
51e50b5
·
verified ·
1 Parent(s): a6c4989

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -313
app.py CHANGED
@@ -1,366 +1,222 @@
1
  import os
2
- import re
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
 
 
6
  import heapq
7
  from collections import Counter
 
8
  from io import BytesIO
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
- from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
11
- from langchain_community.document_loaders import WikipediaLoader, PyPDFLoader, TextLoader
12
- from dotenv import load_dotenv
13
- import tempfile
14
- import mimetypes
15
- import logging
16
 
17
- # --- Initialize logging ---
18
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19
- logger = logging.getLogger(__name__)
20
 
21
- # --- Load environment variables ---
22
  load_dotenv()
23
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
24
- if not HF_API_TOKEN:
25
- logger.error("HF_API_TOKEN not found in environment variables! Please set it to use the HfApiModel.")
26
- # Exit or raise an error if the token is critical for functionality
27
- # sys.exit(1) # Uncomment if you want to exit the script if token is missing
28
-
29
- # --- Utility Functions ---
30
- def extract_youtube_id(url: str) -> str:
31
- """Extract YouTube ID from various URL formats"""
32
- patterns = [
33
- r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)',
34
- r'(?:https?:\/\/)?youtu\.be\/([^?]+)',
35
- r'([a-zA-Z0-9_-]{11})' # Catches just the ID if provided directly
36
- ]
37
- for pattern in patterns:
38
- match = re.search(pattern, url)
39
- if match:
40
- return match.group(1)
41
- return ""
42
-
43
- # --- Enhanced Tools ---
44
  class WikiSearchTool(Tool):
45
- """Enhanced Wikipedia search with better formatting and error handling"""
46
  name = "wiki_search"
47
- description = "Search Wikipedia for a query. Returns up to 2 results with metadata."
48
- inputs = {"query": {"type": "string", "description": "Search term for Wikipedia"}}
 
 
49
  output_type = "string"
50
 
51
  def forward(self, query: str) -> str:
52
- try:
53
- logger.info(f"Searching Wikipedia for: {query}")
54
- docs = WikipediaLoader(query=query, load_max_docs=2).load()
55
- if not docs:
56
- logger.info(f"No Wikipedia articles found for: {query}")
57
- return "No Wikipedia articles found."
58
-
59
- formatted_results = []
60
- for i, doc in enumerate(docs):
61
- # Limit page content length to avoid overwhelming the model, but provide enough context
62
- summary = doc.page_content[:1000] + "..." if len(doc.page_content) > 1000 else doc.page_content
63
- formatted_results.append(
64
- f"--- Wikipedia Result {i+1} ---\n"
65
- f"Title: {doc.metadata.get('title', 'N/A')}\n"
66
- f"URL: {doc.metadata.get('source', 'N/A')}\n"
67
- f"Summary: {summary}\n"
68
- )
69
- return "\n\n".join(formatted_results)
70
- except Exception as e:
71
- logger.error(f"Wikipedia search error for '{query}': {e}")
72
- return f"Wikipedia search error: {str(e)}"
73
-
74
- class FileAnalysisTool(Tool):
75
- """Universal file analyzer for text/PDF/Excel files"""
76
- name = "file_analysis"
77
- description = "Analyze text, PDF, and Excel files. Returns extracted content."
78
- inputs = {"file_path": {"type": "string", "description": "Path to the local file"}}
79
  output_type = "string"
80
 
81
- def forward(self, file_path: str) -> str:
82
- if not os.path.exists(file_path):
83
- return f"File not found: {file_path}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
 
85
  try:
86
- mime_type, _ = mimetypes.guess_type(file_path)
87
- logger.info(f"Analyzing file: {file_path} with MIME type: {mime_type}")
88
-
89
- if mime_type == "application/pdf":
90
- return self._process_pdf(file_path)
91
- elif mime_type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
92
- return self._process_excel(file_path)
93
- elif mime_type and ("text" in mime_type or "csv" in mime_type):
94
- return self._process_text(file_path)
95
- else:
96
- return f"Unsupported file type for analysis: {mime_type}. Only PDF, Excel, and text/CSV files are supported."
97
  except Exception as e:
98
- logger.error(f"File analysis error for '{file_path}': {e}")
99
- return f"File analysis error: {str(e)}"
100
-
101
- def _process_pdf(self, path: str) -> str:
102
- loader = PyPDFLoader(path)
103
- docs = loader.load()
104
- content = "\n\n".join([doc.page_content for doc in docs])
105
- # Truncate to avoid excessive token usage, provide a warning if truncated
106
- if len(content) > 8000:
107
- logger.warning(f"PDF content truncated from {len(content)} to 8000 characters for {path}")
108
- return content[:8000] + "\n... [Content truncated]"
109
- return content
110
-
111
- def _process_excel(self, path: str) -> str:
112
- df = pd.read_excel(path)
113
- # Provide a sample of the data and its basic info
114
- info = BytesIO()
115
- df.info(buf=info)
116
- info_str = info.getvalue().decode('utf-8')
117
 
118
- return (f"Excel file loaded. First 10 rows:\n{df.head(10).to_markdown()}\n\n"
119
- f"DataFrame Info:\n{info_str}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- def _process_text(self, path: str) -> str:
122
- with open(path, 'r', encoding='utf-8') as f:
123
- content = f.read()
124
- if len(content) > 8000:
125
- logger.warning(f"Text file content truncated from {len(content)} to 8000 characters for {path}")
126
- return content[:8000] + "\n... [Content truncated]"
127
- return content
128
 
129
  class VideoTranscriptionTool(Tool):
130
- """Enhanced YouTube transcription with multilingual support and better output"""
131
  name = "transcript_video"
132
- description = "Fetch YouTube video transcripts with optional timestamps. Supports English, French, Spanish, German."
133
  inputs = {
134
- "url": {"type": "string", "description": "YouTube URL or ID"},
135
- "include_timestamps": {"type": "boolean", "description": "Include timestamps? (default: False)"}
136
  }
137
  output_type = "string"
138
 
139
  def forward(self, url: str, include_timestamps: bool = False) -> str:
 
 
 
 
 
 
 
 
 
 
140
  try:
141
- video_id = extract_youtube_id(url)
142
- if not video_id:
143
- return "Invalid YouTube URL or ID format. Please provide a valid YouTube URL or an 11-character video ID."
144
-
145
- logger.info(f"Attempting to transcribe video ID: {video_id}")
146
- transcript = YouTubeTranscriptApi.get_transcript(
147
- video_id,
148
- languages=['en', 'fr', 'es', 'de'] # Prioritize common languages
149
- )
150
-
151
- if not transcript:
152
- return f"No transcript found for video ID: {video_id} in supported languages (en, fr, es, de)."
153
 
154
  if include_timestamps:
155
- formatted_transcript = "\n".join(
156
- f"[{int(seg['start']//60):02d}:{int(seg['start']%60):02d}] {seg['text']}"
157
- for seg in transcript
158
- )
 
159
  else:
160
- formatted_transcript = " ".join(seg['text'] for seg in transcript)
161
-
162
- return formatted_transcript
163
- except Exception as e:
164
- logger.error(f"Transcription error for '{url}': {e}")
165
- return f"Transcription error: {str(e)}. This might be due to no available transcript or an unsupported video."
166
 
167
- class DataAnalysisTool(Tool):
168
- """Perform data analysis using pandas on structured data (CSV/Excel)"""
169
- name = "data_analysis"
170
- description = "Analyze CSV/Excel data using pandas operations. Supported operations: 'describe', 'groupby:column:aggfunc' (e.g., 'groupby:Category:mean')."
171
- inputs = {
172
- "file_path": {"type": "string", "description": "Path to the local data file (CSV or Excel)"},
173
- "operation": {"type": "string", "description": "Pandas operation (e.g., 'describe', 'groupby:column_name:mean')"}
174
- }
175
- output_type = "string"
176
-
177
- def forward(self, file_path: str, operation: str) -> str:
178
- if not os.path.exists(file_path):
179
- return f"File not found: {file_path}"
180
-
181
- try:
182
- if file_path.endswith('.csv'):
183
- df = pd.read_csv(file_path)
184
- elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
185
- df = pd.read_excel(file_path)
186
- else:
187
- return "Unsupported file format for data analysis. Please provide a .csv or .xlsx file."
188
-
189
- logger.info(f"Performing data analysis operation '{operation}' on {file_path}")
190
-
191
- if operation == "describe":
192
- return "Descriptive Statistics:\n" + str(df.describe())
193
- elif operation.startswith("groupby:"):
194
- parts = operation.split(":")
195
- if len(parts) == 3:
196
- _, col, agg = parts
197
- if col not in df.columns:
198
- return f"Column '{col}' not found in the DataFrame."
199
- try:
200
- result = df.groupby(col).agg(agg)
201
- return f"Groupby operation '{agg}' on column '{col}':\n" + str(result)
202
- except Exception as agg_e:
203
- return f"Error performing aggregation '{agg}' on column '{col}': {str(agg_e)}"
204
- else:
205
- return "Invalid 'groupby' operation format. Use 'groupby:column_name:agg_function'."
206
- else:
207
- return "Unsupported operation. Try: 'describe' or 'groupby:column_name:agg_function'."
208
  except Exception as e:
209
- logger.error(f"Data analysis error for '{file_path}' with operation '{operation}': {e}")
210
- return f"Data analysis error: {str(e)}. Please check file content and operation."
211
 
212
- # --- Agent Initialization ---
213
  class BasicAgent:
214
  def __init__(self):
215
- self.model = HfApiModel(
216
- temperature=0.1, # Slightly increased temperature for more creative responses if appropriate
217
- token=HF_API_TOKEN,
218
- max_tokens=2000
219
  )
220
-
221
- self.tools = self._initialize_tools()
222
- self.agent = self._create_agent()
223
-
224
- def _initialize_tools(self) -> list:
225
- """Initialize all tools with enhanced capabilities"""
226
- return [
227
- DuckDuckGoSearchTool(),
228
- WikiSearchTool(),
229
- VisitWebpageTool(),
230
- SpeechToTextTool(), # Might be less relevant for a text-based research agent but kept if needed
231
- FinalAnswerTool(),
232
- VideoTranscriptionTool(),
233
- FileAnalysisTool(),
234
- DataAnalysisTool(),
235
- self._create_excel_download_tool(), # Renamed for clarity
236
- self._create_keywords_tool()
237
- ]
238
-
239
- def _create_excel_download_tool(self):
240
- """Tool to download and parse Excel files from a specific URL"""
241
- @tool
242
- def download_and_parse_excel(task_id: str) -> dict:
243
- """
244
- Downloads an Excel file from a predefined URL using a task_id and parses its content.
245
- Returns a dictionary with status and data (first 20 rows).
246
- """
247
- try:
248
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
249
- logger.info(f"Attempting to download Excel from: {url}")
250
- response = requests.get(url, timeout=60) # Increased timeout for larger files
251
- response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
252
-
253
- with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
254
- tmp.write(response.content)
255
- temp_file_path = tmp.name
256
-
257
- df = pd.read_excel(temp_file_path)
258
- os.unlink(temp_file_path) # Clean up the temporary file
259
-
260
- logger.info(f"Successfully downloaded and parsed Excel for task_id: {task_id}")
261
- return {
262
- "task_id": task_id,
263
- "data_sample": df.head(10).to_dict(orient="records"), # Reduced to 10 for conciseness
264
- "status": "Success",
265
- "columns": df.columns.tolist(), # Added column names for context
266
- "shape": df.shape # Added shape for context
267
- }
268
- except requests.exceptions.RequestException as req_err:
269
- logger.error(f"Network or HTTP error downloading Excel for task_id '{task_id}': {req_err}")
270
- return {"status": f"Download error: {str(req_err)}"}
271
- except Exception as e:
272
- logger.error(f"Error parsing Excel for task_id '{task_id}': {e}")
273
- return {"status": f"Parsing error: {str(e)}"}
274
- return download_and_parse_excel
275
-
276
- def _create_keywords_tool(self):
277
- """Keywords extractor with TF-IDF like scoring (basic frequency for now)"""
278
- @tool
279
- def extract_keywords(text: str, top_n: int = 5) -> list:
280
- """
281
- Extracts the most frequent keywords from a given text, excluding common stopwords.
282
- Args:
283
- text (str): The input text to extract keywords from.
284
- top_n (int): The number of top keywords to return.
285
- Returns:
286
- list: A list of the most frequent keywords.
287
- """
288
- if not text:
289
- return []
290
-
291
- # Use a more comprehensive list of English stopwords
292
- stopwords = set([
293
- "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it",
294
- "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these",
295
- "they", "this", "to", "was", "will", "with", "he", "she", "it's", "i", "we", "you", "my",
296
- "your", "our", "us", "him", "her", "his", "hers", "its", "them", "their", "what", "when",
297
- "where", "why", "how", "which", "who", "whom", "can", "could", "would", "should", "may",
298
- "might", "must", "have", "has", "had", "do", "does", "did", "am", "are", "is", "were", "been",
299
- "being", "from", "up", "down", "out", "off", "over", "under", "again", "further", "then",
300
- "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few",
301
- "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so",
302
- "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"
303
- ])
304
-
305
- words = re.findall(r'\b\w+\b', text.lower()) # Relaxed regex to capture all words
306
- filtered = [w for w in words if w not in stopwords and len(w) > 2] # Filter words less than 3 chars
307
- counter = Counter(filtered)
308
- return [word for word, _ in counter.most_common(top_n)]
309
- return extract_keywords
310
-
311
- def _create_agent(self) -> CodeAgent:
312
- """Create agent with improved system prompt"""
313
- system_prompt = """
314
- You are an advanced, helpful, and highly analytical research assistant. Your goal is to provide accurate, comprehensive, and well-structured answers to user queries, leveraging all available tools efficiently.
315
-
316
- **Follow this robust process:**
317
-
318
- 1. **Understand the User's Need:** Carefully analyze the user's question, including any attached files or specific requests (e.g., "summarize," "analyze data," "find facts").
319
- 2. **Formulate a Detailed Plan:** Before acting, create a clear, step-by-step plan. This plan should outline:
320
- * What information needs to be gathered.
321
- * Which tools are most appropriate for each step (e.g., `duckduckgo_search` for general web search, `wiki_search` for encyclopedic facts, `transcript_video` for YouTube, `file_analysis` or `data_analysis` for local files).
322
- * How you will combine information from different sources.
323
- * How you will verify or synthesize the findings.
324
- 3. **Execute the Plan Using Tools:** Call the necessary tools, providing clear and correct arguments. If a tool fails, try to understand why and adapt your plan (e.g., try a different search query or tool).
325
- 4. **Synthesize and Verify Information:** Once you have gathered sufficient information, synthesize it into a coherent answer. Do not just list facts; explain their significance and how they relate to the original question. If there are contradictions or uncertainties, mention them.
326
- 5. **Formulate the Final Answer:**
327
- * Present your answer clearly and concisely.
328
- * Always begin your ultimate response with "FINAL ANSWER:".
329
- * If the answer is a single number, provide only the number.
330
- * If the answer is a list, provide comma-separated values.
331
- * For complex answers, use structured formats like bullet points or JSON where appropriate to enhance readability.
332
- #* **Crucially, always include sources or references (e.g., URLs, Wikipedia titles, file names) where you obtained the information.** This builds trust and allows for verification.
333
- * If you used `file_analysis` or `data_analysis` tools on an uploaded file, explicitly state that you analyzed the provided file.
334
-
335
- **Important Considerations:**
336
- * **Prioritize:** If the query involves a specific file, start by analyzing that file if appropriate.
337
- * **Ambiguity:** If the question is ambiguous, ask for clarification.
338
- * **Limitations:** If you cannot answer a question with the available tools, state that clearly.
339
- * **Conciseness:** Be as concise as possible while providing a complete and accurate answer.
340
  """
341
- agent = CodeAgent(
342
- model=self.model,
343
- tools=self.tools,
344
  add_base_tools=True
345
  )
346
- agent.prompt_templates["system_prompt"] = system_prompt
347
- return agent
348
 
349
  def __call__(self, question: str) -> str:
350
  print(f"Agent received question (first 50 chars): {question[:50]}...")
351
  answer = self.agent.run(question)
352
  print(f"Agent returning answer: {answer}")
353
  return answer
354
-
355
- logger.info(f"Received question: {question[:200]}...") # Log more of the question
356
- try:
357
- response = self.agent.run(question)
358
- logger.info(f"Response generated successfully for question: {question[:200]}")
359
- return response
360
- except Exception as e:
361
- logger.error(f"Agent execution failed for question '{question[:100]}': {str(e)}", exc_info=True) # Log full traceback
362
- return f"Error processing your request: {str(e)}. Please try again or rephrase your question."
363
-
364
 
365
  def run_and_submit_all( profile: gr.OAuthProfile | None):
366
  """
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
7
+ from dotenv import load_dotenv
8
  import heapq
9
  from collections import Counter
10
+ import re
11
  from io import BytesIO
12
  from youtube_transcript_api import YouTubeTranscriptApi
13
+ from langchain_community.tools.tavily_search import TavilySearchResults
14
+ from langchain_community.document_loaders import WikipediaLoader
15
+ from langchain_community.utilities import WikipediaAPIWrapper
16
+ from langchain_community.document_loaders import ArxivLoader
 
 
17
 
18
+ # (Keep Constants as is)
19
+ # --- Constants ---
20
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
+ #Load environment variables
23
  load_dotenv()
24
+
25
+
26
+
27
+
28
+ from smolagents import Tool
29
+ from langchain_community.document_loaders import WikipediaLoader
30
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  class WikiSearchTool(Tool):
 
32
  name = "wiki_search"
33
+ description = "Search Wikipedia for a query and return up to 2 results."
34
+ inputs = {
35
+ "query": {"type": "string", "description": "The search term for Wikipedia."}
36
+ }
37
  output_type = "string"
38
 
39
  def forward(self, query: str) -> str:
40
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
41
+
42
+ formatted_search_docs = "\n\n---\n\n".join(
43
+ [
44
+ f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
45
+ for doc in search_docs
46
+ ]
47
+ )
48
+ return formatted_search_docs
49
+
50
+
51
+
52
+
53
+ class StringReverseTool(Tool):
54
+ name = "reverse_message"
55
+ description = "When you received a strange text, try to reverse it and perform action described in reversed message."
56
+ inputs = {
57
+ "message": {
58
+ "type": "string",
59
+ "description": "A message, which looks like strange and can be reversed to get actions to execute."
60
+ }
61
+ }
 
 
 
 
 
62
  output_type = "string"
63
 
64
+ def __init__(self):
65
+ return
66
+
67
+ def forward(self, message: str):
68
+ return message[::-1]
69
+
70
+ class KeywordsExtractorTool(Tool):
71
+ """Extracts top 5 keywords from a given text based on frequency."""
72
+
73
+ name = "keywords_extractor"
74
+ description = "This tool returns the 5 most frequent keywords occur in provided block of text."
75
+
76
+ inputs = {
77
+ "text": {
78
+ "type": "string",
79
+ "description": "Text to analyze for keywords.",
80
+ }
81
+ }
82
+ output_type = "string"
83
 
84
+ def forward(self, text: str) -> str:
85
  try:
86
+ all_words = re.findall(r'\b\w+\b', text.lower())
87
+ conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
88
+ filtered_words = []
89
+ for w in all_words:
90
+ if w not in conjunctions:
91
+ filtered_words.push(w)
92
+ word_counts = Counter(filtered_words)
93
+ k = 5
94
+ return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
 
 
95
  except Exception as e:
96
+ return f"Error during extracting most common words: {e}"
97
+
98
+ @tool
99
+ def parse_excel_to_json(task_id: str) -> dict:
100
+ """
101
+ For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
102
+ Args:
103
+ task_id: An task ID to fetch.
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ Returns:
106
+ {
107
+ "task_id": str,
108
+ "sheets": {
109
+ "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
110
+ ...
111
+ },
112
+ "status": "Success" | "Error"
113
+ }
114
+ """
115
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
116
+
117
+ try:
118
+ response = requests.get(url, timeout=100)
119
+ if response.status_code != 200:
120
+ return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
121
+
122
+ xls_content = pd.ExcelFile(BytesIO(response.content))
123
+ json_sheets = {}
124
+
125
+ for sheet in xls_content.sheet_names:
126
+ df = xls_content.parse(sheet)
127
+ df = df.dropna(how="all")
128
+ rows = df.head(20).to_dict(orient="records")
129
+ json_sheets[sheet] = rows
130
+
131
+ return {
132
+ "task_id": task_id,
133
+ "sheets": json_sheets,
134
+ "status": "Success"
135
+ }
136
+
137
+ except Exception as e:
138
+ return {
139
+ "task_id": task_id,
140
+ "sheets": {},
141
+ "status": f"Error in parsing Excel file: {str(e)}"
142
+ }
143
+
144
 
 
 
 
 
 
 
 
145
 
146
  class VideoTranscriptionTool(Tool):
147
+ """Fetch transcripts from YouTube videos"""
148
  name = "transcript_video"
149
+ description = "Fetch text transcript from YouTube movies with optional timestamps"
150
  inputs = {
151
+ "url": {"type": "string", "description": "YouTube video URL or ID"},
152
+ "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
153
  }
154
  output_type = "string"
155
 
156
  def forward(self, url: str, include_timestamps: bool = False) -> str:
157
+
158
+ if "youtube.com/watch" in url:
159
+ video_id = url.split("v=")[1].split("&")[0]
160
+ elif "youtu.be/" in url:
161
+ video_id = url.split("youtu.be/")[1].split("?")[0]
162
+ elif len(url.strip()) == 11: # Direct ID
163
+ video_id = url.strip()
164
+ else:
165
+ return f"YouTube URL or ID: {url} is invalid!"
166
+
167
  try:
168
+ transcription = YouTubeTranscriptApi.get_transcript(video_id)
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  if include_timestamps:
171
+ formatted_transcription = []
172
+ for part in transcription:
173
+ timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
174
+ formatted_transcription.append(f"[{timestamp}] {part['text']}")
175
+ return "\n".join(formatted_transcription)
176
  else:
177
+ return " ".join([part['text'] for part in transcription])
 
 
 
 
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  except Exception as e:
180
+ return f"Error in extracting YouTube transcript: {str(e)}"
 
181
 
 
182
  class BasicAgent:
183
  def __init__(self):
184
+ token = os.environ.get("HF_API_TOKEN")
185
+ model = HfApiModel(
186
+ temperature=0.1,
187
+ token=token
188
  )
189
+
190
+ search_tool = DuckDuckGoSearchTool()
191
+ wiki_search_tool = WikiSearchTool()
192
+ str_reverse_tool = StringReverseTool()
193
+ keywords_extract_tool = KeywordsExtractorTool()
194
+ speech_to_text_tool = SpeechToTextTool()
195
+ visit_webpage_tool = VisitWebpageTool()
196
+ final_answer_tool = FinalAnswerTool()
197
+ video_transcription_tool = VideoTranscriptionTool()
198
+
199
+ system_prompt = f"""
200
+ You are my general AI assistant. Your task is to answer the question I asked.
201
+ First, provide an explanation of your reasoning, step by step, to arrive at the answer.
202
+ Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
203
+ [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
204
+ If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
205
+ If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
206
+ If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  """
208
+ self.agent = CodeAgent(
209
+ model=model,
210
+ tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
211
  add_base_tools=True
212
  )
213
+ self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
 
214
 
215
  def __call__(self, question: str) -> str:
216
  print(f"Agent received question (first 50 chars): {question[:50]}...")
217
  answer = self.agent.run(question)
218
  print(f"Agent returning answer: {answer}")
219
  return answer
 
 
 
 
 
 
 
 
 
 
220
 
221
  def run_and_submit_all( profile: gr.OAuthProfile | None):
222
  """