Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,6 @@ from langchain_community.document_loaders import WikipediaLoader
|
|
15 |
from langchain_community.utilities import WikipediaAPIWrapper
|
16 |
from langchain_community.document_loaders import ArxivLoader
|
17 |
|
18 |
-
# (Keep Constants as is)
|
19 |
# --- Constants ---
|
20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
21 |
|
@@ -26,8 +25,7 @@ import io
|
|
26 |
import contextlib
|
27 |
import traceback
|
28 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
29 |
-
from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
|
30 |
-
|
31 |
|
32 |
class CodeLlamaTool(Tool):
|
33 |
name = "code_llama_tool"
|
@@ -62,6 +60,7 @@ class CodeLlamaTool(Tool):
|
|
62 |
)
|
63 |
|
64 |
def forward(self, question: str) -> str:
|
|
|
65 |
self.prompt = f"""You are an AI that uses Python code to answer questions.
|
66 |
Question: {question}
|
67 |
Instructions:
|
@@ -77,7 +76,7 @@ print(5 * math.sqrt(36))
|
|
77 |
<final>30.0</final>
|
78 |
Answer:"""
|
79 |
|
80 |
-
response = self.pipeline(prompt)[0]["generated_text"]
|
81 |
return self.parse_and_execute(response)
|
82 |
|
83 |
def parse_and_execute(self, response: str) -> str:
|
@@ -107,11 +106,6 @@ Answer:"""
|
|
107 |
except Exception:
|
108 |
return f"Error executing code:\n{traceback.format_exc()}"
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
#from smolagents import Tool
|
113 |
-
#from langchain_community.document_loaders import WikipediaLoader
|
114 |
-
|
115 |
class WikiSearchTool(Tool):
|
116 |
name = "wiki_search"
|
117 |
description = "Search Wikipedia for a query and return up to 2 results."
|
@@ -131,9 +125,6 @@ class WikiSearchTool(Tool):
|
|
131 |
)
|
132 |
return formatted_search_docs
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
class StringReverseTool(Tool):
|
138 |
name = "reverse_message"
|
139 |
description = "When you received a strange text, try to reverse it and perform action described in reversed message."
|
@@ -172,10 +163,10 @@ class KeywordsExtractorTool(Tool):
|
|
172 |
filtered_words = []
|
173 |
for w in all_words:
|
174 |
if w not in conjunctions:
|
175 |
-
filtered_words.
|
176 |
word_counts = Counter(filtered_words)
|
177 |
k = 5
|
178 |
-
return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
|
179 |
except Exception as e:
|
180 |
return f"Error during extracting most common words: {e}"
|
181 |
|
@@ -225,8 +216,6 @@ def parse_excel_to_json(task_id: str) -> dict:
|
|
225 |
"status": f"Error in parsing Excel file: {str(e)}"
|
226 |
}
|
227 |
|
228 |
-
|
229 |
-
|
230 |
class VideoTranscriptionTool(Tool):
|
231 |
"""Fetch transcripts from YouTube videos"""
|
232 |
name = "transcript_video"
|
@@ -239,14 +228,17 @@ class VideoTranscriptionTool(Tool):
|
|
239 |
|
240 |
def forward(self, url: str, include_timestamps: bool = False) -> str:
|
241 |
|
242 |
-
|
|
|
|
|
243 |
video_id = url.split("v=")[1].split("&")[0]
|
244 |
elif "youtu.be/" in url:
|
245 |
video_id = url.split("youtu.be/")[1].split("?")[0]
|
246 |
-
elif len(url.strip()) == 11: # Direct ID
|
247 |
video_id = url.strip()
|
248 |
-
|
249 |
-
|
|
|
250 |
|
251 |
try:
|
252 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
@@ -265,8 +257,8 @@ class VideoTranscriptionTool(Tool):
|
|
265 |
|
266 |
class BasicAgent:
|
267 |
def __init__(self):
|
268 |
-
token = os.environ.get("
|
269 |
-
|
270 |
# Initialize tokenizer
|
271 |
self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
|
272 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
|
@@ -288,7 +280,7 @@ class BasicAgent:
|
|
288 |
final_answer_tool = FinalAnswerTool()
|
289 |
video_transcription_tool = VideoTranscriptionTool()
|
290 |
|
291 |
-
#
|
292 |
code_llama_tool = CodeLlamaTool()
|
293 |
|
294 |
self.system_prompt = f"""
|
@@ -304,7 +296,7 @@ Answer should be a short string, number, or comma-separated list. Keep it brief.
|
|
304 |
keywords_extract_tool, speech_to_text_tool,
|
305 |
visit_webpage_tool, final_answer_tool,
|
306 |
parse_excel_to_json, video_transcription_tool,
|
307 |
-
code_llama_tool
|
308 |
],
|
309 |
add_base_tools=True
|
310 |
)
|
|
|
15 |
from langchain_community.utilities import WikipediaAPIWrapper
|
16 |
from langchain_community.document_loaders import ArxivLoader
|
17 |
|
|
|
18 |
# --- Constants ---
|
19 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
20 |
|
|
|
25 |
import contextlib
|
26 |
import traceback
|
27 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
28 |
+
# from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel # These are already imported above
|
|
|
29 |
|
30 |
class CodeLlamaTool(Tool):
|
31 |
name = "code_llama_tool"
|
|
|
60 |
)
|
61 |
|
62 |
def forward(self, question: str) -> str:
|
63 |
+
# Corrected: Use self.prompt and then pass it to the pipeline
|
64 |
self.prompt = f"""You are an AI that uses Python code to answer questions.
|
65 |
Question: {question}
|
66 |
Instructions:
|
|
|
76 |
<final>30.0</final>
|
77 |
Answer:"""
|
78 |
|
79 |
+
response = self.pipeline(self.prompt)[0]["generated_text"] # Pass self.prompt
|
80 |
return self.parse_and_execute(response)
|
81 |
|
82 |
def parse_and_execute(self, response: str) -> str:
|
|
|
106 |
except Exception:
|
107 |
return f"Error executing code:\n{traceback.format_exc()}"
|
108 |
|
|
|
|
|
|
|
|
|
|
|
109 |
class WikiSearchTool(Tool):
|
110 |
name = "wiki_search"
|
111 |
description = "Search Wikipedia for a query and return up to 2 results."
|
|
|
125 |
)
|
126 |
return formatted_search_docs
|
127 |
|
|
|
|
|
|
|
128 |
class StringReverseTool(Tool):
|
129 |
name = "reverse_message"
|
130 |
description = "When you received a strange text, try to reverse it and perform action described in reversed message."
|
|
|
163 |
filtered_words = []
|
164 |
for w in all_words:
|
165 |
if w not in conjunctions:
|
166 |
+
filtered_words.append(w) # Corrected: Use append instead of push
|
167 |
word_counts = Counter(filtered_words)
|
168 |
k = 5
|
169 |
+
return str(heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])) # Corrected: Convert list of tuples to string
|
170 |
except Exception as e:
|
171 |
return f"Error during extracting most common words: {e}"
|
172 |
|
|
|
216 |
"status": f"Error in parsing Excel file: {str(e)}"
|
217 |
}
|
218 |
|
|
|
|
|
219 |
class VideoTranscriptionTool(Tool):
|
220 |
"""Fetch transcripts from YouTube videos"""
|
221 |
name = "transcript_video"
|
|
|
228 |
|
229 |
def forward(self, url: str, include_timestamps: bool = False) -> str:
|
230 |
|
231 |
+
# Corrected: Handle various YouTube URL formats
|
232 |
+
video_id = None
|
233 |
+
if "youtube.com/watch?v=" in url:
|
234 |
video_id = url.split("v=")[1].split("&")[0]
|
235 |
elif "youtu.be/" in url:
|
236 |
video_id = url.split("youtu.be/")[1].split("?")[0]
|
237 |
+
elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url): # Direct ID
|
238 |
video_id = url.strip()
|
239 |
+
|
240 |
+
if not video_id:
|
241 |
+
return f"YouTube URL or ID: {url} is invalid or not supported!"
|
242 |
|
243 |
try:
|
244 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
|
|
257 |
|
258 |
class BasicAgent:
|
259 |
def __init__(self):
|
260 |
+
token = os.environ.get("HF_TOKEN") # Corrected: Use HF_TOKEN
|
261 |
+
|
262 |
# Initialize tokenizer
|
263 |
self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
|
264 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
|
|
|
280 |
final_answer_tool = FinalAnswerTool()
|
281 |
video_transcription_tool = VideoTranscriptionTool()
|
282 |
|
283 |
+
# New Llama Tool
|
284 |
code_llama_tool = CodeLlamaTool()
|
285 |
|
286 |
self.system_prompt = f"""
|
|
|
296 |
keywords_extract_tool, speech_to_text_tool,
|
297 |
visit_webpage_tool, final_answer_tool,
|
298 |
parse_excel_to_json, video_transcription_tool,
|
299 |
+
code_llama_tool
|
300 |
],
|
301 |
add_base_tools=True
|
302 |
)
|