wt002 commited on
Commit
c93c36d
·
verified ·
1 Parent(s): 2234779

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -41
app.py CHANGED
@@ -15,6 +15,7 @@ from langchain_community.document_loaders import WikipediaLoader
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
  from langchain_community.document_loaders import ArxivLoader
17
 
 
18
  # --- Constants ---
19
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
20
 
@@ -25,7 +26,8 @@ import io
25
  import contextlib
26
  import traceback
27
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
28
- # from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel # These are already imported above
 
29
 
30
  class CodeLlamaTool(Tool):
31
  name = "code_llama_tool"
@@ -54,14 +56,13 @@ class CodeLlamaTool(Tool):
54
  "text-generation",
55
  model=self.model,
56
  tokenizer=self.tokenizer,
57
- max_new_tokens=256, # 512
58
- temperature=0.0,
59
  truncation=True
60
  )
61
 
62
  def forward(self, question: str) -> str:
63
- # Corrected: Use self.prompt and then pass it to the pipeline
64
- self.prompt = f"""You are an AI that uses Python code to answer questions.
65
  Question: {question}
66
  Instructions:
67
  - If solving requires code, use a block like <tool>code</tool>.
@@ -76,7 +77,7 @@ print(5 * math.sqrt(36))
76
  <final>30.0</final>
77
  Answer:"""
78
 
79
- response = self.pipeline(self.prompt)[0]["generated_text"] # Pass self.prompt
80
  return self.parse_and_execute(response)
81
 
82
  def parse_and_execute(self, response: str) -> str:
@@ -106,6 +107,11 @@ Answer:"""
106
  except Exception:
107
  return f"Error executing code:\n{traceback.format_exc()}"
108
 
 
 
 
 
 
109
  class WikiSearchTool(Tool):
110
  name = "wiki_search"
111
  description = "Search Wikipedia for a query and return up to 2 results."
@@ -125,6 +131,9 @@ class WikiSearchTool(Tool):
125
  )
126
  return formatted_search_docs
127
 
 
 
 
128
  class StringReverseTool(Tool):
129
  name = "reverse_message"
130
  description = "When you received a strange text, try to reverse it and perform action described in reversed message."
@@ -163,10 +172,10 @@ class KeywordsExtractorTool(Tool):
163
  filtered_words = []
164
  for w in all_words:
165
  if w not in conjunctions:
166
- filtered_words.append(w) # Corrected: Use append instead of push
167
  word_counts = Counter(filtered_words)
168
  k = 5
169
- return str(heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])) # Corrected: Convert list of tuples to string
170
  except Exception as e:
171
  return f"Error during extracting most common words: {e}"
172
 
@@ -216,6 +225,8 @@ def parse_excel_to_json(task_id: str) -> dict:
216
  "status": f"Error in parsing Excel file: {str(e)}"
217
  }
218
 
 
 
219
  class VideoTranscriptionTool(Tool):
220
  """Fetch transcripts from YouTube videos"""
221
  name = "transcript_video"
@@ -228,17 +239,14 @@ class VideoTranscriptionTool(Tool):
228
 
229
  def forward(self, url: str, include_timestamps: bool = False) -> str:
230
 
231
- # Corrected: Handle various YouTube URL formats
232
- video_id = None
233
- if "youtube.com/watch?v=" in url:
234
  video_id = url.split("v=")[1].split("&")[0]
235
  elif "youtu.be/" in url:
236
  video_id = url.split("youtu.be/")[1].split("?")[0]
237
- elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url): # Direct ID
238
  video_id = url.strip()
239
-
240
- if not video_id:
241
- return f"YouTube URL or ID: {url} is invalid or not supported!"
242
 
243
  try:
244
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
@@ -257,16 +265,9 @@ class VideoTranscriptionTool(Tool):
257
 
258
  class BasicAgent:
259
  def __init__(self):
260
- token = os.environ.get("HF_TOKEN") # Corrected: Use HF_TOKEN
261
-
262
- # Initialize tokenizer
263
- self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
264
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
265
-
266
- # Model (e.g., HfApiModel or other)
267
- self.model = HfApiModel(
268
- model=self.model_id,
269
- temperature=0.0,
270
  token=token
271
  )
272
 
@@ -280,39 +281,35 @@ class BasicAgent:
280
  final_answer_tool = FinalAnswerTool()
281
  video_transcription_tool = VideoTranscriptionTool()
282
 
283
- # New Llama Tool
284
  code_llama_tool = CodeLlamaTool()
285
 
286
- self.system_prompt = f"""
287
  You are my general AI assistant. Your task is to answer the question I asked.
288
- First, provide reasoning. Then return: FINAL ANSWER: [your answer].
289
- Answer should be a short string, number, or comma-separated list. Keep it brief.
 
 
 
 
290
  """
291
 
292
  self.agent = CodeAgent(
293
- model=self.model,
294
  tools=[
295
  search_tool, wiki_search_tool, str_reverse_tool,
296
  keywords_extract_tool, speech_to_text_tool,
297
  visit_webpage_tool, final_answer_tool,
298
  parse_excel_to_json, video_transcription_tool,
299
- code_llama_tool
300
  ],
301
  add_base_tools=True
302
  )
303
- self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + self.system_prompt
304
-
305
- def _build_safe_prompt(self, history: str, question: str, max_total_tokens=32768, reserve_for_output=2048):
306
- max_input_tokens = max_total_tokens - reserve_for_output
307
- full_prompt = f"{self.system_prompt}\n{history}\nQuestion: {question}"
308
- tokenized = self.tokenizer(full_prompt, truncation=True, max_length=max_input_tokens, return_tensors="pt")
309
- return self.tokenizer.decode(tokenized["input_ids"][0])
310
 
311
  def __call__(self, question: str) -> str:
312
  print(f"Agent received question (first 50 chars): {question[:50]}...")
313
- history = "" # could be conversation history, if available
314
- safe_prompt = self._build_safe_prompt(history, question)
315
- answer = self.agent.run(safe_prompt)
316
  print(f"Agent returning answer: {answer}")
317
  return answer
318
 
 
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
  from langchain_community.document_loaders import ArxivLoader
17
 
18
+ # (Keep Constants as is)
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
 
26
  import contextlib
27
  import traceback
28
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
29
+ from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
30
+
31
 
32
  class CodeLlamaTool(Tool):
33
  name = "code_llama_tool"
 
56
  "text-generation",
57
  model=self.model,
58
  tokenizer=self.tokenizer,
59
+ max_new_tokens=512,
60
+ temperature=0.2,
61
  truncation=True
62
  )
63
 
64
  def forward(self, question: str) -> str:
65
+ prompt = f"""You are an AI that uses Python code to answer questions.
 
66
  Question: {question}
67
  Instructions:
68
  - If solving requires code, use a block like <tool>code</tool>.
 
77
  <final>30.0</final>
78
  Answer:"""
79
 
80
+ response = self.pipeline(prompt)[0]["generated_text"]
81
  return self.parse_and_execute(response)
82
 
83
  def parse_and_execute(self, response: str) -> str:
 
107
  except Exception:
108
  return f"Error executing code:\n{traceback.format_exc()}"
109
 
110
+
111
+
112
+ #from smolagents import Tool
113
+ #from langchain_community.document_loaders import WikipediaLoader
114
+
115
  class WikiSearchTool(Tool):
116
  name = "wiki_search"
117
  description = "Search Wikipedia for a query and return up to 2 results."
 
131
  )
132
  return formatted_search_docs
133
 
134
+
135
+
136
+
137
  class StringReverseTool(Tool):
138
  name = "reverse_message"
139
  description = "When you received a strange text, try to reverse it and perform action described in reversed message."
 
172
  filtered_words = []
173
  for w in all_words:
174
  if w not in conjunctions:
175
+ filtered_words.push(w)
176
  word_counts = Counter(filtered_words)
177
  k = 5
178
+ return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
179
  except Exception as e:
180
  return f"Error during extracting most common words: {e}"
181
 
 
225
  "status": f"Error in parsing Excel file: {str(e)}"
226
  }
227
 
228
+
229
+
230
  class VideoTranscriptionTool(Tool):
231
  """Fetch transcripts from YouTube videos"""
232
  name = "transcript_video"
 
239
 
240
  def forward(self, url: str, include_timestamps: bool = False) -> str:
241
 
242
+ if "youtube.com/watch" in url:
 
 
243
  video_id = url.split("v=")[1].split("&")[0]
244
  elif "youtu.be/" in url:
245
  video_id = url.split("youtu.be/")[1].split("?")[0]
246
+ elif len(url.strip()) == 11: # Direct ID
247
  video_id = url.strip()
248
+ else:
249
+ return f"YouTube URL or ID: {url} is invalid!"
 
250
 
251
  try:
252
  transcription = YouTubeTranscriptApi.get_transcript(video_id)
 
265
 
266
  class BasicAgent:
267
  def __init__(self):
268
+ token = os.environ.get("HF_API_TOKEN")
269
+ model = HfApiModel(
270
+ temperature=0.1,
 
 
 
 
 
 
 
271
  token=token
272
  )
273
 
 
281
  final_answer_tool = FinalAnswerTool()
282
  video_transcription_tool = VideoTranscriptionTool()
283
 
284
+ # New Llama Tool
285
  code_llama_tool = CodeLlamaTool()
286
 
287
+ system_prompt = f"""
288
  You are my general AI assistant. Your task is to answer the question I asked.
289
+ First, provide an explanation of your reasoning, step by step, to arrive at the answer.
290
+ Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
291
+ [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
292
+ If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
293
+ If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
294
+ If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
295
  """
296
 
297
  self.agent = CodeAgent(
298
+ model=model,
299
  tools=[
300
  search_tool, wiki_search_tool, str_reverse_tool,
301
  keywords_extract_tool, speech_to_text_tool,
302
  visit_webpage_tool, final_answer_tool,
303
  parse_excel_to_json, video_transcription_tool,
304
+ code_llama_tool # 🔧 Add here
305
  ],
306
  add_base_tools=True
307
  )
308
+ self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
 
 
 
 
 
 
309
 
310
  def __call__(self, question: str) -> str:
311
  print(f"Agent received question (first 50 chars): {question[:50]}...")
312
+ answer = self.agent.run(question)
 
 
313
  print(f"Agent returning answer: {answer}")
314
  return answer
315