wt002 commited on
Commit
2234779
·
verified ·
1 Parent(s): ac2433e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -24
app.py CHANGED
@@ -15,7 +15,6 @@ from langchain_community.document_loaders import WikipediaLoader
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
  from langchain_community.document_loaders import ArxivLoader
17
 
18
- # (Keep Constants as is)
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
@@ -26,8 +25,7 @@ import io
26
  import contextlib
27
  import traceback
28
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
29
- from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
30
-
31
 
32
  class CodeLlamaTool(Tool):
33
  name = "code_llama_tool"
@@ -62,6 +60,7 @@ class CodeLlamaTool(Tool):
62
  )
63
 
64
  def forward(self, question: str) -> str:
 
65
  self.prompt = f"""You are an AI that uses Python code to answer questions.
66
  Question: {question}
67
  Instructions:
@@ -77,7 +76,7 @@ print(5 * math.sqrt(36))
77
  <final>30.0</final>
78
  Answer:"""
79
 
80
- response = self.pipeline(prompt)[0]["generated_text"]
81
  return self.parse_and_execute(response)
82
 
83
  def parse_and_execute(self, response: str) -> str:
@@ -107,11 +106,6 @@ Answer:"""
107
  except Exception:
108
  return f"Error executing code:\n{traceback.format_exc()}"
109
 
110
-
111
-
112
- #from smolagents import Tool
113
- #from langchain_community.document_loaders import WikipediaLoader
114
-
115
  class WikiSearchTool(Tool):
116
  name = "wiki_search"
117
  description = "Search Wikipedia for a query and return up to 2 results."
@@ -131,9 +125,6 @@ class WikiSearchTool(Tool):
131
  )
132
  return formatted_search_docs
133
 
134
-
135
-
136
-
137
  class StringReverseTool(Tool):
138
  name = "reverse_message"
139
  description = "When you received a strange text, try to reverse it and perform action described in reversed message."
@@ -172,10 +163,10 @@ class KeywordsExtractorTool(Tool):
172
  filtered_words = []
173
  for w in all_words:
174
  if w not in conjunctions:
175
- filtered_words.push(w)
176
  word_counts = Counter(filtered_words)
177
  k = 5
178
- return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
179
  except Exception as e:
180
  return f"Error during extracting most common words: {e}"
181
 
@@ -225,8 +216,6 @@ def parse_excel_to_json(task_id: str) -> dict:
225
  "status": f"Error in parsing Excel file: {str(e)}"
226
  }
227
 
228
-
229
-
230
  class VideoTranscriptionTool(Tool):
231
  """Fetch transcripts from YouTube videos"""
232
  name = "transcript_video"
@@ -239,14 +228,17 @@ class VideoTranscriptionTool(Tool):
239
 
240
  def forward(self, url: str, include_timestamps: bool = False) -> str:
241
 
242
- if "youtube.com/watch" in url:
 
 
243
  video_id = url.split("v=")[1].split("&")[0]
244
  elif "youtu.be/" in url:
245
  video_id = url.split("youtu.be/")[1].split("?")[0]
246
- elif len(url.strip()) == 11: # Direct ID
247
  video_id = url.strip()
248
- else:
249
- return f"YouTube URL or ID: {url} is invalid!"
 
250
 
251
  try:
252
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
@@ -265,8 +257,8 @@ class VideoTranscriptionTool(Tool):
265
 
266
  class BasicAgent:
267
  def __init__(self):
268
- token = os.environ.get("HF_API_TOKEN")
269
-
270
  # Initialize tokenizer
271
  self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
272
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
@@ -288,7 +280,7 @@ class BasicAgent:
288
  final_answer_tool = FinalAnswerTool()
289
  video_transcription_tool = VideoTranscriptionTool()
290
 
291
- # New Llama Tool
292
  code_llama_tool = CodeLlamaTool()
293
 
294
  self.system_prompt = f"""
@@ -304,7 +296,7 @@ Answer should be a short string, number, or comma-separated list. Keep it brief.
304
  keywords_extract_tool, speech_to_text_tool,
305
  visit_webpage_tool, final_answer_tool,
306
  parse_excel_to_json, video_transcription_tool,
307
- code_llama_tool # 🔧 Add here
308
  ],
309
  add_base_tools=True
310
  )
 
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
  from langchain_community.document_loaders import ArxivLoader
17
 
 
18
  # --- Constants ---
19
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
20
 
 
25
  import contextlib
26
  import traceback
27
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
28
+ # from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel # These are already imported above
 
29
 
30
  class CodeLlamaTool(Tool):
31
  name = "code_llama_tool"
 
60
  )
61
 
62
  def forward(self, question: str) -> str:
63
+ # Corrected: Use self.prompt and then pass it to the pipeline
64
  self.prompt = f"""You are an AI that uses Python code to answer questions.
65
  Question: {question}
66
  Instructions:
 
76
  <final>30.0</final>
77
  Answer:"""
78
 
79
+ response = self.pipeline(self.prompt)[0]["generated_text"] # Pass self.prompt
80
  return self.parse_and_execute(response)
81
 
82
  def parse_and_execute(self, response: str) -> str:
 
106
  except Exception:
107
  return f"Error executing code:\n{traceback.format_exc()}"
108
 
 
 
 
 
 
109
  class WikiSearchTool(Tool):
110
  name = "wiki_search"
111
  description = "Search Wikipedia for a query and return up to 2 results."
 
125
  )
126
  return formatted_search_docs
127
 
 
 
 
128
  class StringReverseTool(Tool):
129
  name = "reverse_message"
130
  description = "When you received a strange text, try to reverse it and perform action described in reversed message."
 
163
  filtered_words = []
164
  for w in all_words:
165
  if w not in conjunctions:
166
+ filtered_words.append(w) # Corrected: Use append instead of push
167
  word_counts = Counter(filtered_words)
168
  k = 5
169
+ return str(heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])) # Corrected: Convert list of tuples to string
170
  except Exception as e:
171
  return f"Error during extracting most common words: {e}"
172
 
 
216
  "status": f"Error in parsing Excel file: {str(e)}"
217
  }
218
 
 
 
219
  class VideoTranscriptionTool(Tool):
220
  """Fetch transcripts from YouTube videos"""
221
  name = "transcript_video"
 
228
 
229
  def forward(self, url: str, include_timestamps: bool = False) -> str:
230
 
231
+ # Corrected: Handle various YouTube URL formats
232
+ video_id = None
233
+ if "youtube.com/watch?v=" in url:
234
  video_id = url.split("v=")[1].split("&")[0]
235
  elif "youtu.be/" in url:
236
  video_id = url.split("youtu.be/")[1].split("?")[0]
237
+ elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url): # Direct ID
238
  video_id = url.strip()
239
+
240
+ if not video_id:
241
+ return f"YouTube URL or ID: {url} is invalid or not supported!"
242
 
243
  try:
244
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
 
257
 
258
  class BasicAgent:
259
  def __init__(self):
260
+ token = os.environ.get("HF_TOKEN") # Corrected: Use HF_TOKEN
261
+
262
  # Initialize tokenizer
263
  self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
264
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
 
280
  final_answer_tool = FinalAnswerTool()
281
  video_transcription_tool = VideoTranscriptionTool()
282
 
283
+ # New Llama Tool
284
  code_llama_tool = CodeLlamaTool()
285
 
286
  self.system_prompt = f"""
 
296
  keywords_extract_tool, speech_to_text_tool,
297
  visit_webpage_tool, final_answer_tool,
298
  parse_excel_to_json, video_transcription_tool,
299
+ code_llama_tool
300
  ],
301
  add_base_tools=True
302
  )