Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from langchain_community.document_loaders import WikipediaLoader
|
|
15 |
from langchain_community.utilities import WikipediaAPIWrapper
|
16 |
from langchain_community.document_loaders import ArxivLoader
|
17 |
|
|
|
18 |
# --- Constants ---
|
19 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
20 |
|
@@ -25,7 +26,8 @@ import io
|
|
25 |
import contextlib
|
26 |
import traceback
|
27 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
28 |
-
|
|
|
29 |
|
30 |
class CodeLlamaTool(Tool):
|
31 |
name = "code_llama_tool"
|
@@ -54,14 +56,13 @@ class CodeLlamaTool(Tool):
|
|
54 |
"text-generation",
|
55 |
model=self.model,
|
56 |
tokenizer=self.tokenizer,
|
57 |
-
max_new_tokens=
|
58 |
-
temperature=0.
|
59 |
truncation=True
|
60 |
)
|
61 |
|
62 |
def forward(self, question: str) -> str:
|
63 |
-
|
64 |
-
self.prompt = f"""You are an AI that uses Python code to answer questions.
|
65 |
Question: {question}
|
66 |
Instructions:
|
67 |
- If solving requires code, use a block like <tool>code</tool>.
|
@@ -76,7 +77,7 @@ print(5 * math.sqrt(36))
|
|
76 |
<final>30.0</final>
|
77 |
Answer:"""
|
78 |
|
79 |
-
response = self.pipeline(
|
80 |
return self.parse_and_execute(response)
|
81 |
|
82 |
def parse_and_execute(self, response: str) -> str:
|
@@ -106,6 +107,11 @@ Answer:"""
|
|
106 |
except Exception:
|
107 |
return f"Error executing code:\n{traceback.format_exc()}"
|
108 |
|
|
|
|
|
|
|
|
|
|
|
109 |
class WikiSearchTool(Tool):
|
110 |
name = "wiki_search"
|
111 |
description = "Search Wikipedia for a query and return up to 2 results."
|
@@ -125,6 +131,9 @@ class WikiSearchTool(Tool):
|
|
125 |
)
|
126 |
return formatted_search_docs
|
127 |
|
|
|
|
|
|
|
128 |
class StringReverseTool(Tool):
|
129 |
name = "reverse_message"
|
130 |
description = "When you received a strange text, try to reverse it and perform action described in reversed message."
|
@@ -163,10 +172,10 @@ class KeywordsExtractorTool(Tool):
|
|
163 |
filtered_words = []
|
164 |
for w in all_words:
|
165 |
if w not in conjunctions:
|
166 |
-
filtered_words.
|
167 |
word_counts = Counter(filtered_words)
|
168 |
k = 5
|
169 |
-
return
|
170 |
except Exception as e:
|
171 |
return f"Error during extracting most common words: {e}"
|
172 |
|
@@ -216,6 +225,8 @@ def parse_excel_to_json(task_id: str) -> dict:
|
|
216 |
"status": f"Error in parsing Excel file: {str(e)}"
|
217 |
}
|
218 |
|
|
|
|
|
219 |
class VideoTranscriptionTool(Tool):
|
220 |
"""Fetch transcripts from YouTube videos"""
|
221 |
name = "transcript_video"
|
@@ -228,17 +239,14 @@ class VideoTranscriptionTool(Tool):
|
|
228 |
|
229 |
def forward(self, url: str, include_timestamps: bool = False) -> str:
|
230 |
|
231 |
-
|
232 |
-
video_id = None
|
233 |
-
if "youtube.com/watch?v=" in url:
|
234 |
video_id = url.split("v=")[1].split("&")[0]
|
235 |
elif "youtu.be/" in url:
|
236 |
video_id = url.split("youtu.be/")[1].split("?")[0]
|
237 |
-
elif len(url.strip()) == 11
|
238 |
video_id = url.strip()
|
239 |
-
|
240 |
-
|
241 |
-
return f"YouTube URL or ID: {url} is invalid or not supported!"
|
242 |
|
243 |
try:
|
244 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
@@ -257,16 +265,9 @@ class VideoTranscriptionTool(Tool):
|
|
257 |
|
258 |
class BasicAgent:
|
259 |
def __init__(self):
|
260 |
-
token = os.environ.get("
|
261 |
-
|
262 |
-
|
263 |
-
self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
|
264 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
|
265 |
-
|
266 |
-
# Model (e.g., HfApiModel or other)
|
267 |
-
self.model = HfApiModel(
|
268 |
-
model=self.model_id,
|
269 |
-
temperature=0.0,
|
270 |
token=token
|
271 |
)
|
272 |
|
@@ -280,39 +281,35 @@ class BasicAgent:
|
|
280 |
final_answer_tool = FinalAnswerTool()
|
281 |
video_transcription_tool = VideoTranscriptionTool()
|
282 |
|
283 |
-
# New Llama Tool
|
284 |
code_llama_tool = CodeLlamaTool()
|
285 |
|
286 |
-
|
287 |
You are my general AI assistant. Your task is to answer the question I asked.
|
288 |
-
First, provide reasoning
|
289 |
-
|
|
|
|
|
|
|
|
|
290 |
"""
|
291 |
|
292 |
self.agent = CodeAgent(
|
293 |
-
model=
|
294 |
tools=[
|
295 |
search_tool, wiki_search_tool, str_reverse_tool,
|
296 |
keywords_extract_tool, speech_to_text_tool,
|
297 |
visit_webpage_tool, final_answer_tool,
|
298 |
parse_excel_to_json, video_transcription_tool,
|
299 |
-
code_llama_tool
|
300 |
],
|
301 |
add_base_tools=True
|
302 |
)
|
303 |
-
self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] +
|
304 |
-
|
305 |
-
def _build_safe_prompt(self, history: str, question: str, max_total_tokens=32768, reserve_for_output=2048):
|
306 |
-
max_input_tokens = max_total_tokens - reserve_for_output
|
307 |
-
full_prompt = f"{self.system_prompt}\n{history}\nQuestion: {question}"
|
308 |
-
tokenized = self.tokenizer(full_prompt, truncation=True, max_length=max_input_tokens, return_tensors="pt")
|
309 |
-
return self.tokenizer.decode(tokenized["input_ids"][0])
|
310 |
|
311 |
def __call__(self, question: str) -> str:
|
312 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
313 |
-
|
314 |
-
safe_prompt = self._build_safe_prompt(history, question)
|
315 |
-
answer = self.agent.run(safe_prompt)
|
316 |
print(f"Agent returning answer: {answer}")
|
317 |
return answer
|
318 |
|
|
|
15 |
from langchain_community.utilities import WikipediaAPIWrapper
|
16 |
from langchain_community.document_loaders import ArxivLoader
|
17 |
|
18 |
+
# (Keep Constants as is)
|
19 |
# --- Constants ---
|
20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
21 |
|
|
|
26 |
import contextlib
|
27 |
import traceback
|
28 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
29 |
+
from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
|
30 |
+
|
31 |
|
32 |
class CodeLlamaTool(Tool):
|
33 |
name = "code_llama_tool"
|
|
|
56 |
"text-generation",
|
57 |
model=self.model,
|
58 |
tokenizer=self.tokenizer,
|
59 |
+
max_new_tokens=512,
|
60 |
+
temperature=0.2,
|
61 |
truncation=True
|
62 |
)
|
63 |
|
64 |
def forward(self, question: str) -> str:
|
65 |
+
prompt = f"""You are an AI that uses Python code to answer questions.
|
|
|
66 |
Question: {question}
|
67 |
Instructions:
|
68 |
- If solving requires code, use a block like <tool>code</tool>.
|
|
|
77 |
<final>30.0</final>
|
78 |
Answer:"""
|
79 |
|
80 |
+
response = self.pipeline(prompt)[0]["generated_text"]
|
81 |
return self.parse_and_execute(response)
|
82 |
|
83 |
def parse_and_execute(self, response: str) -> str:
|
|
|
107 |
except Exception:
|
108 |
return f"Error executing code:\n{traceback.format_exc()}"
|
109 |
|
110 |
+
|
111 |
+
|
112 |
+
#from smolagents import Tool
|
113 |
+
#from langchain_community.document_loaders import WikipediaLoader
|
114 |
+
|
115 |
class WikiSearchTool(Tool):
|
116 |
name = "wiki_search"
|
117 |
description = "Search Wikipedia for a query and return up to 2 results."
|
|
|
131 |
)
|
132 |
return formatted_search_docs
|
133 |
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
class StringReverseTool(Tool):
|
138 |
name = "reverse_message"
|
139 |
description = "When you received a strange text, try to reverse it and perform action described in reversed message."
|
|
|
172 |
filtered_words = []
|
173 |
for w in all_words:
|
174 |
if w not in conjunctions:
|
175 |
+
filtered_words.push(w)
|
176 |
word_counts = Counter(filtered_words)
|
177 |
k = 5
|
178 |
+
return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
|
179 |
except Exception as e:
|
180 |
return f"Error during extracting most common words: {e}"
|
181 |
|
|
|
225 |
"status": f"Error in parsing Excel file: {str(e)}"
|
226 |
}
|
227 |
|
228 |
+
|
229 |
+
|
230 |
class VideoTranscriptionTool(Tool):
|
231 |
"""Fetch transcripts from YouTube videos"""
|
232 |
name = "transcript_video"
|
|
|
239 |
|
240 |
def forward(self, url: str, include_timestamps: bool = False) -> str:
|
241 |
|
242 |
+
if "youtube.com/watch" in url:
|
|
|
|
|
243 |
video_id = url.split("v=")[1].split("&")[0]
|
244 |
elif "youtu.be/" in url:
|
245 |
video_id = url.split("youtu.be/")[1].split("?")[0]
|
246 |
+
elif len(url.strip()) == 11: # Direct ID
|
247 |
video_id = url.strip()
|
248 |
+
else:
|
249 |
+
return f"YouTube URL or ID: {url} is invalid!"
|
|
|
250 |
|
251 |
try:
|
252 |
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
|
|
265 |
|
266 |
class BasicAgent:
|
267 |
def __init__(self):
|
268 |
+
token = os.environ.get("HF_API_TOKEN")
|
269 |
+
model = HfApiModel(
|
270 |
+
temperature=0.1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
token=token
|
272 |
)
|
273 |
|
|
|
281 |
final_answer_tool = FinalAnswerTool()
|
282 |
video_transcription_tool = VideoTranscriptionTool()
|
283 |
|
284 |
+
# ✅ New Llama Tool
|
285 |
code_llama_tool = CodeLlamaTool()
|
286 |
|
287 |
+
system_prompt = f"""
|
288 |
You are my general AI assistant. Your task is to answer the question I asked.
|
289 |
+
First, provide an explanation of your reasoning, step by step, to arrive at the answer.
|
290 |
+
Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
|
291 |
+
[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
|
292 |
+
If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
|
293 |
+
If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
|
294 |
+
If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
|
295 |
"""
|
296 |
|
297 |
self.agent = CodeAgent(
|
298 |
+
model=model,
|
299 |
tools=[
|
300 |
search_tool, wiki_search_tool, str_reverse_tool,
|
301 |
keywords_extract_tool, speech_to_text_tool,
|
302 |
visit_webpage_tool, final_answer_tool,
|
303 |
parse_excel_to_json, video_transcription_tool,
|
304 |
+
code_llama_tool # 🔧 Add here
|
305 |
],
|
306 |
add_base_tools=True
|
307 |
)
|
308 |
+
self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
def __call__(self, question: str) -> str:
|
311 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
312 |
+
answer = self.agent.run(question)
|
|
|
|
|
313 |
print(f"Agent returning answer: {answer}")
|
314 |
return answer
|
315 |
|